Skip to content

Commit 9f7bd6c

Browse files
committed
Add openai integration for analysing documents
1 parent e8ccb9b commit 9f7bd6c

7 files changed

+121
-0
lines changed

Diff for: Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ gem "rgeo"
4444
gem "rgeo-geojson"
4545
gem "rswag-api"
4646
gem "rswag-ui"
47+
gem "ruby-openai"
4748
gem "sidekiq"
4849
gem "sidekiq-scheduler"
4950
gem "sprockets-rails"

Diff for: Gemfile.lock

+9
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ GEM
259259
erubi (1.13.1)
260260
et-orbi (1.2.11)
261261
tzinfo
262+
event_stream_parser (1.0.0)
262263
factory_bot (6.4.6)
263264
activesupport (>= 5.0.0)
264265
factory_bot_rails (6.4.3)
@@ -268,6 +269,8 @@ GEM
268269
i18n (>= 1.6, < 2)
269270
faraday (2.9.0)
270271
faraday-net_http (>= 2.0, < 3.2)
272+
faraday-multipart (1.1.0)
273+
multipart-post (~> 2.0)
271274
faraday-net_http (3.1.0)
272275
net-http
273276
ffi (1.17.1)
@@ -391,6 +394,7 @@ GEM
391394
minitest (5.25.4)
392395
msgpack (1.7.2)
393396
multi_test (1.1.0)
397+
multipart-post (2.4.1)
394398
nenv (0.3.0)
395399
net-http (0.4.1)
396400
uri
@@ -571,6 +575,10 @@ GEM
571575
rack (>= 1.1)
572576
rubocop (>= 1.33.0, < 2.0)
573577
rubocop-ast (>= 1.31.1, < 2.0)
578+
ruby-openai (8.0.0)
579+
event_stream_parser (>= 0.3.0, < 2.0.0)
580+
faraday (>= 1)
581+
faraday-multipart (>= 1)
574582
ruby-progressbar (1.13.0)
575583
ruby-vips (2.1.4)
576584
ffi (~> 1.12)
@@ -735,6 +743,7 @@ DEPENDENCIES
735743
rswag-api
736744
rswag-specs
737745
rswag-ui
746+
ruby-openai
738747
selenium-webdriver
739748
sidekiq
740749
sidekiq-scheduler

Diff for: app/jobs/document_analyser_job.rb

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
class DocumentAnalyserJob < ApplicationJob
2+
queue_as :high_priority
3+
4+
def perform(document)
5+
DocumentAnalyserService.new(document).call
6+
end
7+
end

Diff for: app/models/document.rb

+7
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class NotArchiveableError < StandardError; end
5656
before_update :reset_replacement_document_validation_request_update_counter!, if: :owner_is_validation_request?
5757
after_update :audit_updated!
5858

59+
after_create :generate_ai_summary
60+
5961
DRAWING_TAGS = %w[
6062
elevations.existing
6163
elevations.proposed
@@ -284,6 +286,11 @@ def tags(key)
284286
end
285287
end
286288

289+
def generate_ai_summary
290+
# Call the open ai service
291+
DocumentAnalyserJob.perform_later(self)
292+
end
293+
287294
def name
288295
file.filename.to_s if file.attached?
289296
end

Diff for: app/services/document_analyser_service.rb

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
require "openai"
2+
require "base64"
3+
4+
class DocumentAnalyserService
5+
attr_reader :document, :client
6+
7+
def initialize(document)
8+
@document = document
9+
@client = OpenAI::Client.new(
10+
access_token: "XXX",
11+
request_timeout: 120
12+
)
13+
end
14+
15+
def call
16+
return unless document.file.attached?
17+
18+
file_path = download_file
19+
return unless file_path
20+
21+
image = build_image(file_path)
22+
user_content = user_message(image)
23+
24+
ai_summary = generate_ai_summary(user_content)
25+
26+
document.update!(ai_summary: ai_summary) if ai_summary.present?
27+
rescue StandardError => e
28+
Rails.logger.error("Error analysing the document with id: #{document.id}: #{e.message}")
29+
false
30+
end
31+
32+
private
33+
34+
def download_file
35+
# Create a temporary file with the original filename and extension
36+
temp_file = Tempfile.new([document.file.filename.base, document.file.filename.extension_with_delimiter], binmode: true)
37+
38+
# Download the file locally in binary mode
39+
begin
40+
document.file.blob.download do |chunk|
41+
temp_file.write(chunk.force_encoding("BINARY"))
42+
end
43+
rescue => e
44+
Rails.logger.error "Failed to download file: #{e.message}"
45+
return nil
46+
end
47+
48+
temp_file.close
49+
temp_file.path
50+
end
51+
52+
def build_image(file_path)
53+
image_data = File.binread(file_path)
54+
base64_image = Base64.strict_encode64(image_data)
55+
end
56+
57+
def generate_ai_summary(user_content)
58+
response = client.chat(
59+
parameters: {
60+
model: "gpt-4o-mini",
61+
messages: [
62+
{
63+
role: "system",
64+
content: "You are an expert, with experience in UK housing/planning, in interpreting and extracting a concise summary/description of a document"
65+
},
66+
{
67+
role: "user",
68+
content: user_content
69+
}
70+
]
71+
}
72+
)
73+
74+
response.dig("choices", 0, "message", "content")
75+
end
76+
77+
def user_message(base64_image)
78+
[
79+
{
80+
type: "text",
81+
text: "Analyse this image/document and provide a short description/summary of its content in no more than 2 sentences."
82+
},
83+
{
84+
type: "image_url",
85+
image_url: {
86+
url: "data:image/jpeg;base64,#{base64_image}"
87+
}
88+
}
89+
]
90+
end
91+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddAiSummaryToDocuments < ActiveRecord::Migration[7.2]
2+
def change
3+
add_column :documents, :ai_summary, :text
4+
end
5+
end

Diff for: db/schema.rb

+1
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@
421421
t.bigint "owner_id"
422422
t.string "tags", default: [], array: true
423423
t.bigint "document_checklist_items_id"
424+
t.text "ai_summary"
424425
t.index ["api_user_id"], name: "ix_documents_on_api_user_id"
425426
t.index ["document_checklist_items_id"], name: "ix_documents_on_document_checklist_items_id"
426427
t.index ["evidence_group_id"], name: "ix_documents_on_evidence_group_id"

0 commit comments

Comments
 (0)