Skip to content

Commit 1754d7c

Browse files
benbaumann95rebeccaoneill
authored andcommitted
Add openai integration for analysing documents
1 parent 29e40a4 commit 1754d7c

9 files changed

+172
-2
lines changed

Diff for: Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ gem "rgeo"
4444
gem "rgeo-geojson"
4545
gem "rswag-api"
4646
gem "rswag-ui"
47+
gem "ruby-openai"
4748
gem "sidekiq"
4849
gem "sidekiq-scheduler"
4950
gem "sprockets-rails"

Diff for: Gemfile.lock

+9
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ GEM
259259
erubi (1.13.1)
260260
et-orbi (1.2.11)
261261
tzinfo
262+
event_stream_parser (1.0.0)
262263
factory_bot (6.4.6)
263264
activesupport (>= 5.0.0)
264265
factory_bot_rails (6.4.3)
@@ -268,6 +269,8 @@ GEM
268269
i18n (>= 1.6, < 2)
269270
faraday (2.9.0)
270271
faraday-net_http (>= 2.0, < 3.2)
272+
faraday-multipart (1.1.0)
273+
multipart-post (~> 2.0)
271274
faraday-net_http (3.1.0)
272275
net-http
273276
ffi (1.17.1)
@@ -390,6 +393,7 @@ GEM
390393
minitest (5.25.4)
391394
msgpack (1.7.2)
392395
multi_test (1.1.0)
396+
multipart-post (2.4.1)
393397
nenv (0.3.0)
394398
net-http (0.4.1)
395399
uri
@@ -568,6 +572,10 @@ GEM
568572
rack (>= 1.1)
569573
rubocop (>= 1.33.0, < 2.0)
570574
rubocop-ast (>= 1.31.1, < 2.0)
575+
ruby-openai (8.0.0)
576+
event_stream_parser (>= 0.3.0, < 2.0.0)
577+
faraday (>= 1)
578+
faraday-multipart (>= 1)
571579
ruby-progressbar (1.13.0)
572580
ruby-vips (2.1.4)
573581
ffi (~> 1.12)
@@ -731,6 +739,7 @@ DEPENDENCIES
731739
rswag-api
732740
rswag-specs
733741
rswag-ui
742+
ruby-openai
734743
selenium-webdriver
735744
sidekiq
736745
sidekiq-scheduler

Diff for: app/controllers/documents_controller.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def create
3838
@document = @planning_application.documents.build(document_params)
3939

4040
if @document.save
41-
flash[:notice] = "#{@document.name} has been uploaded."
41+
flash[:notice] = "#{@document.name} has been uploaded and queued for analysis."
4242
redirect_to planning_application_documents_path
4343
else
4444
render :new

Diff for: app/jobs/document_analyser_job.rb

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
class DocumentAnalyserJob < ApplicationJob
2+
queue_as :high_priority
3+
retry_on(StandardError, attempts: 5, wait: 1.minute, jitter: 0)
4+
5+
def perform(document)
6+
DocumentAnalyserService.new(document).call
7+
end
8+
end

Diff for: app/models/document.rb

+7
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class NotArchiveableError < StandardError; end
5656
before_update :reset_replacement_document_validation_request_update_counter!, if: :owner_is_validation_request?
5757
after_update :audit_updated!
5858

59+
after_create :generate_ai_summary
60+
5961
DRAWING_TAGS = %w[
6062
elevations.existing
6163
elevations.proposed
@@ -284,6 +286,11 @@ def tags(key)
284286
end
285287
end
286288

289+
def generate_ai_summary
290+
# Call the open ai service
291+
DocumentAnalyserJob.perform_later(self)
292+
end
293+
287294
def name
288295
file.filename.to_s if file.attached?
289296
end

Diff for: app/services/document_analyser_service.rb

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
require "openai"
2+
require "base64"
3+
4+
class DocumentAnalyserService
5+
attr_reader :document, :client
6+
7+
def initialize(document)
8+
@document = document
9+
@client = OpenAI::Client.new(
10+
access_token: "XXX",
11+
request_timeout: 120
12+
)
13+
end
14+
15+
def call
16+
return unless document.file.attached?
17+
18+
file_path = download_file
19+
return unless file_path
20+
21+
image = build_image(file_path)
22+
user_content = user_message_tags(image)
23+
24+
ai_summary = generate_ai_summary(user_content)
25+
26+
document_tags = generate_document_tags(user_content)
27+
28+
document.update!(tags: document_tags) if document_tags.any?
29+
30+
document.update!(ai_summary: ai_summary) if ai_summary.present?
31+
rescue StandardError => e
32+
Rails.logger.error("Error analysing the document with id: #{document.id}: #{e.message}")
33+
false
34+
end
35+
36+
private
37+
38+
def download_file
39+
# Create a temporary file with the original filename and extension
40+
temp_file = Tempfile.new([document.file.filename.base, document.file.filename.extension_with_delimiter], binmode: true)
41+
42+
# Download the file locally in binary mode
43+
begin
44+
document.file.blob.download do |chunk|
45+
temp_file.write(chunk.force_encoding("BINARY"))
46+
end
47+
rescue => e
48+
Rails.logger.error "Failed to download file: #{e.message}"
49+
return nil
50+
end
51+
52+
temp_file.close
53+
temp_file.path
54+
end
55+
56+
def build_image(file_path)
57+
image_data = File.binread(file_path)
58+
base64_image = Base64.strict_encode64(image_data)
59+
end
60+
61+
def generate_ai_summary(user_content)
62+
response = client.chat(
63+
parameters: {
64+
model: "gpt-4o-mini",
65+
messages: [
66+
{
67+
role: "system",
68+
content: "You are an expert, with experience in UK housing/planning, in interpreting and extracting a concise summary/description of a document"
69+
},
70+
{
71+
role: "user",
72+
content: user_content
73+
}
74+
]
75+
}
76+
)
77+
78+
response.dig("choices", 0, "message", "content")
79+
end
80+
81+
def user_message(base64_image)
82+
[
83+
{
84+
type: "text",
85+
text: "Analyse this image/document and provide a short description/summary of its content in no more than 2 sentences."
86+
},
87+
{
88+
type: "image_url",
89+
image_url: {
90+
url: "data:image/jpeg;base64,#{base64_image}"
91+
}
92+
}
93+
]
94+
end
95+
96+
def generate_document_tags(user_content)
97+
response = client.chat(
98+
parameters: {
99+
model: "gpt-4o-mini",
100+
messages: [
101+
{
102+
role: "system",
103+
content: "You are an AI expert in reviewing and tagging documents with expertise in the UK housing/planning system. When tagging documents use ONLY the following predefined options:
104+
- **DRAWING TAGS**: #{Document::DRAWING_TAGS.join(', ')}
105+
- **EVIDENCE TAGS**: #{Document::EVIDENCE_TAGS.join(', ')}
106+
- **SUPPORTING DOCUMENT TAGS**: #{Document::SUPPORTING_DOCUMENT_TAGS.join(', ')}"
107+
},
108+
{
109+
role: "user",
110+
content: user_content
111+
}
112+
]
113+
}
114+
)
115+
response.dig("choices", 0, "message", "content").split(", ").map(&:strip)
116+
end
117+
118+
def user_message_tags(base64_image)
119+
[
120+
{
121+
type: "text",
122+
text: "Analyse this image/document and assign relevant document tags from the supplied lists.
123+
ONLY choose tags from these lists. You may choose more than one tag if relevant. Provide a comma separated list of ONLY the tags."
124+
},
125+
{
126+
type: "image_url",
127+
image_url: {
128+
url: "data:image/jpeg;base64,#{base64_image}"
129+
}
130+
}
131+
]
132+
end
133+
end

Diff for: app/views/documents/edit.html.erb

+6
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@
7373
<p class="govuk-body">
7474
<%= created_by(@document) %>
7575
</p>
76+
77+
<p class="govuk-body govuk-!-margin-bottom-0">
78+
<strong>AI summary:</strong> <%= @document.ai_summary %>
79+
</p>
80+
81+
7682
</div>
7783
<div class="govuk-grid-column-full">
7884
<%= render "edit_and_upload" %>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddAiSummaryToDocuments < ActiveRecord::Migration[7.2]
2+
def change
3+
add_column :documents, :ai_summary, :text
4+
end
5+
end

Diff for: db/schema.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#
1111
# It's strongly recommended that you check this file into your version control system.
1212

13-
ActiveRecord::Schema[7.2].define(version: 2025_03_17_171204) do
13+
ActiveRecord::Schema[7.2].define(version: 2025_03_19_102739) do
1414
# These are extensions that must be enabled in order to support this database
1515
enable_extension "btree_gin"
1616
enable_extension "plpgsql"
@@ -402,6 +402,7 @@
402402
t.bigint "owner_id"
403403
t.string "tags", default: [], array: true
404404
t.bigint "document_checklist_items_id"
405+
t.text "ai_summary"
405406
t.index ["api_user_id"], name: "ix_documents_on_api_user_id"
406407
t.index ["document_checklist_items_id"], name: "ix_documents_on_document_checklist_items_id"
407408
t.index ["evidence_group_id"], name: "ix_documents_on_evidence_group_id"

0 commit comments

Comments
 (0)