Skip to content

Commit 7c16092

Browse files
committed
Add openai integration for analysing documents
1 parent 29e40a4 commit 7c16092

7 files changed

+122
-1
lines changed

Diff for: Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ gem "rgeo"
4444
gem "rgeo-geojson"
4545
gem "rswag-api"
4646
gem "rswag-ui"
47+
gem "ruby-openai"
4748
gem "sidekiq"
4849
gem "sidekiq-scheduler"
4950
gem "sprockets-rails"

Diff for: Gemfile.lock

+9
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ GEM
259259
erubi (1.13.1)
260260
et-orbi (1.2.11)
261261
tzinfo
262+
event_stream_parser (1.0.0)
262263
factory_bot (6.4.6)
263264
activesupport (>= 5.0.0)
264265
factory_bot_rails (6.4.3)
@@ -268,6 +269,8 @@ GEM
268269
i18n (>= 1.6, < 2)
269270
faraday (2.9.0)
270271
faraday-net_http (>= 2.0, < 3.2)
272+
faraday-multipart (1.1.0)
273+
multipart-post (~> 2.0)
271274
faraday-net_http (3.1.0)
272275
net-http
273276
ffi (1.17.1)
@@ -390,6 +393,7 @@ GEM
390393
minitest (5.25.4)
391394
msgpack (1.7.2)
392395
multi_test (1.1.0)
396+
multipart-post (2.4.1)
393397
nenv (0.3.0)
394398
net-http (0.4.1)
395399
uri
@@ -568,6 +572,10 @@ GEM
568572
rack (>= 1.1)
569573
rubocop (>= 1.33.0, < 2.0)
570574
rubocop-ast (>= 1.31.1, < 2.0)
575+
ruby-openai (8.0.0)
576+
event_stream_parser (>= 0.3.0, < 2.0.0)
577+
faraday (>= 1)
578+
faraday-multipart (>= 1)
571579
ruby-progressbar (1.13.0)
572580
ruby-vips (2.1.4)
573581
ffi (~> 1.12)
@@ -731,6 +739,7 @@ DEPENDENCIES
731739
rswag-api
732740
rswag-specs
733741
rswag-ui
742+
ruby-openai
734743
selenium-webdriver
735744
sidekiq
736745
sidekiq-scheduler

Diff for: app/jobs/document_analyser_job.rb

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
class DocumentAnalyserJob < ApplicationJob
2+
queue_as :high_priority
3+
4+
def perform(document)
5+
DocumentAnalyserService.new(document).call
6+
end
7+
end

Diff for: app/models/document.rb

+7
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class NotArchiveableError < StandardError; end
5656
before_update :reset_replacement_document_validation_request_update_counter!, if: :owner_is_validation_request?
5757
after_update :audit_updated!
5858

59+
after_create :generate_ai_summary
60+
5961
DRAWING_TAGS = %w[
6062
elevations.existing
6163
elevations.proposed
@@ -284,6 +286,11 @@ def tags(key)
284286
end
285287
end
286288

289+
def generate_ai_summary
290+
# Call the open ai service
291+
DocumentAnalyserJob.perform_later(self)
292+
end
293+
287294
def name
288295
file.filename.to_s if file.attached?
289296
end

Diff for: app/services/document_analyser_service.rb

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
require "openai"
2+
require "base64"
3+
4+
class DocumentAnalyserService
5+
attr_reader :document, :client
6+
7+
def initialize(document)
8+
@document = document
9+
@client = OpenAI::Client.new(
10+
access_token: "XXX",
11+
request_timeout: 120
12+
)
13+
end
14+
15+
def call
16+
return unless document.file.attached?
17+
18+
file_path = download_file
19+
return unless file_path
20+
21+
image = build_image(file_path)
22+
user_content = user_message(image)
23+
24+
ai_summary = generate_ai_summary(user_content)
25+
26+
document.update!(ai_summary: ai_summary) if ai_summary.present?
27+
rescue StandardError => e
28+
Rails.logger.error("Error analysing the document with id: #{document.id}: #{e.message}")
29+
false
30+
end
31+
32+
private
33+
34+
def download_file
35+
# Create a temporary file with the original filename and extension
36+
temp_file = Tempfile.new([document.file.filename.base, document.file.filename.extension_with_delimiter], binmode: true)
37+
38+
# Download the file locally in binary mode
39+
begin
40+
document.file.blob.download do |chunk|
41+
temp_file.write(chunk.force_encoding("BINARY"))
42+
end
43+
rescue => e
44+
Rails.logger.error "Failed to download file: #{e.message}"
45+
return nil
46+
end
47+
48+
temp_file.close
49+
temp_file.path
50+
end
51+
52+
def build_image(file_path)
53+
image_data = File.binread(file_path)
54+
base64_image = Base64.strict_encode64(image_data)
55+
end
56+
57+
def generate_ai_summary(user_content)
58+
response = client.chat(
59+
parameters: {
60+
model: "gpt-4o-mini",
61+
messages: [
62+
{
63+
role: "system",
64+
content: "You are an expert, with experience in UK housing/planning, in interpreting and extracting a concise summary/description of a document"
65+
},
66+
{
67+
role: "user",
68+
content: user_content
69+
}
70+
]
71+
}
72+
)
73+
74+
response.dig("choices", 0, "message", "content")
75+
end
76+
77+
def user_message(base64_image)
78+
[
79+
{
80+
type: "text",
81+
text: "Analyse this image/document and provide a short description/summary of its content in no more than 2 sentences."
82+
},
83+
{
84+
type: "image_url",
85+
image_url: {
86+
url: "data:image/jpeg;base64,#{base64_image}"
87+
}
88+
}
89+
]
90+
end
91+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddAiSummaryToDocuments < ActiveRecord::Migration[7.2]
2+
def change
3+
add_column :documents, :ai_summary, :text
4+
end
5+
end

Diff for: db/schema.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#
1111
# It's strongly recommended that you check this file into your version control system.
1212

13-
ActiveRecord::Schema[7.2].define(version: 2025_03_17_171204) do
13+
ActiveRecord::Schema[7.2].define(version: 2025_03_19_102739) do
1414
# These are extensions that must be enabled in order to support this database
1515
enable_extension "btree_gin"
1616
enable_extension "plpgsql"
@@ -402,6 +402,7 @@
402402
t.bigint "owner_id"
403403
t.string "tags", default: [], array: true
404404
t.bigint "document_checklist_items_id"
405+
t.text "ai_summary"
405406
t.index ["api_user_id"], name: "ix_documents_on_api_user_id"
406407
t.index ["document_checklist_items_id"], name: "ix_documents_on_document_checklist_items_id"
407408
t.index ["evidence_group_id"], name: "ix_documents_on_evidence_group_id"

0 commit comments

Comments
 (0)