Skip to content
This repository was archived by the owner on Oct 27, 2025. It is now read-only.

Commit 7a999c7

Browse files
authored
Merge pull request #3311 from alphagov/export-news-articles
Export documents and assets (WHIT-2419)
2 parents 04e799b + 50a075b commit 7a999c7

File tree

6 files changed

+586
-0
lines changed

6 files changed

+586
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
class WhitehallMigration::DocumentExport
2+
def self.exportable_documents
3+
@exportable_documents ||= Document
4+
.includes(:live_edition)
5+
.select do |document|
6+
document.live_edition && document.live_edition.state != "removed"
7+
end
8+
end
9+
10+
def self.export_to_hash(document)
11+
content_revision = document.live_edition.revision.content_revision
12+
13+
{
14+
content_id: document[:content_id],
15+
state: document.live_edition.state,
16+
created_at: document[:created_at],
17+
first_published_at: PublishingApiPayload::History.new(document.live_edition).first_published_at,
18+
updated_at: document[:updated_at],
19+
created_by: User.find(document.created_by_id).email,
20+
last_edited_by: User.find(document.live_edition.revision.created_by_id).email,
21+
document_type: document.live_edition.revision.metadata_revision.document_type_id,
22+
title: content_revision.title,
23+
base_path: content_revision.base_path,
24+
summary: content_revision.summary,
25+
body: content_revision.contents["body"],
26+
tags: document.live_edition.revision.tags_revision.tags,
27+
political: document.live_edition.political?,
28+
government_id: document.live_edition.government_id,
29+
change_notes: change_notes(document),
30+
internal_history: internal_history(document),
31+
images: export_images(document),
32+
attachments: export_attachments(document),
33+
}
34+
end
35+
36+
def self.change_notes(document)
37+
PublishingApiPayload::History.new(document.live_edition).change_history
38+
end
39+
40+
def self.internal_history(document)
41+
timeline_entries = TimelineEntry.where(document:)
42+
.includes(:created_by, :details)
43+
.order(created_at: :desc)
44+
.includes(:edition)
45+
46+
timeline_entries.map do |entry|
47+
entry_content = if entry.internal_note? && entry.details
48+
entry.details.body
49+
elsif (entry.withdrawn? || entry.withdrawn_updated?) && entry.details
50+
entry.details.public_explanation
51+
end
52+
53+
{
54+
edition_number: entry.edition.number,
55+
entry_type: entry.entry_type,
56+
date: entry.created_at.to_fs(:date),
57+
time: entry.created_at.to_fs(:time),
58+
user: entry.created_by.email,
59+
entry_content:,
60+
}
61+
end
62+
end
63+
64+
def self.export_images(document)
65+
revision = document.live_edition.revision
66+
lead_image_revision = revision.lead_image_revision
67+
all_image_revisions = revision.image_revisions
68+
69+
all_image_revisions.map do |image_revision|
70+
{
71+
created_at: image_revision.created_at,
72+
caption: image_revision.caption,
73+
alt_text: image_revision.alt_text,
74+
credit: image_revision.credit,
75+
lead_image: image_revision == lead_image_revision,
76+
variants: image_revision.blob_revision.assets.map do |asset|
77+
{
78+
variant: asset.variant,
79+
file_url: asset.file_url,
80+
}
81+
end,
82+
}
83+
end
84+
end
85+
86+
def self.export_attachments(document)
87+
revision = document.live_edition.revision
88+
all_file_attachment_revisions = revision.file_attachment_revisions
89+
90+
all_file_attachment_revisions.map do |file_attachment_revision|
91+
metadata = file_attachment_revision.metadata_revision
92+
{
93+
file_url: file_attachment_revision.asset.file_url,
94+
title: metadata.title,
95+
created_at: file_attachment_revision.created_at,
96+
}
97+
end
98+
end
99+
end

lib/tasks/export.rake

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
require "json"
2+
3+
namespace :export do
4+
desc "Export a specific live document and its assets, by its content ID"
5+
task :live_document_and_assets, %i[content_id output_file] => :environment do |_, args|
6+
document = Document.find_by(content_id: args[:content_id])
7+
hash = WhitehallMigration::DocumentExport.export_to_hash(document)
8+
9+
if args[:output_file]
10+
File.write(args[:output_file], JSON.pretty_generate(hash))
11+
else
12+
pp hash
13+
end
14+
end
15+
16+
desc "Export all live documents and assets"
17+
task :live_documents_and_assets, %i[output_directory] => :environment do |_, args|
18+
documents = WhitehallMigration::DocumentExport.exportable_documents
19+
20+
puts "Exporting #{documents.count} live editions"
21+
22+
documents.each do |document|
23+
hash = WhitehallMigration::DocumentExport.export_to_hash(document)
24+
25+
if args[:output_directory]
26+
File.write("#{args[:output_directory]}/#{hash[:base_path].split('/').last}.json", JSON.pretty_generate(hash))
27+
else
28+
pp hash
29+
end
30+
end
31+
end
32+
end

lib/versioning/revision_updater.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module Versioning
22
class RevisionUpdater < BaseUpdater
3+
require_relative "./revision_updater/image"
34
include RevisionUpdater::Image
45
include RevisionUpdater::FileAttachment
56

spec/factories/edition_factory.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@
106106
end
107107
end
108108

109+
trait :published_but_needs_2i do
110+
published
111+
112+
transient do
113+
state { "published_but_needs_2i" }
114+
end
115+
end
116+
109117
trait :withdrawn do
110118
summary { SecureRandom.alphanumeric(10) }
111119
live { true }

spec/lib/tasks/export_spec.rb

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
RSpec.describe "Export tasks" do
2+
include ActiveJob::TestHelper
3+
4+
describe "export:live_document_and_assets" do
5+
before do
6+
Rake::Task["export:live_document_and_assets"].reenable
7+
end
8+
9+
it "calls WhitehallMigration::DocumentExport.export_to_hash with correct arguments" do
10+
document = create(:document, :with_live_edition)
11+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash)
12+
Rake::Task["export:live_document_and_assets"].invoke(document.content_id)
13+
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(document)
14+
end
15+
16+
it "pretty-prints the result to STDOUT if no output_file is specified" do
17+
document = create(:document, :with_live_edition)
18+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar" })
19+
expect { Rake::Task["export:live_document_and_assets"].invoke(document.content_id) }.to output("{:foo=>\"bar\"}\n").to_stdout
20+
end
21+
22+
it "writes the result as JSON to the given output_file if specified" do
23+
document = create(:document, :with_live_edition)
24+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar", baz: "qux" })
25+
26+
output_file = Tempfile.new("export")
27+
Rake::Task["export:live_document_and_assets"].invoke(document.content_id, output_file.path)
28+
29+
expected = <<~JSON
30+
{
31+
"foo": "bar",
32+
"baz": "qux"
33+
}
34+
JSON
35+
expect(File.read(output_file.path)).to match(expected.strip)
36+
end
37+
end
38+
39+
describe "export:live_documents_and_assets" do
40+
before do
41+
allow($stdout).to receive(:puts) # suppress output for cleanliness
42+
Rake::Task["export:live_documents_and_assets"].reenable
43+
Document.find_each(&:destroy) # Clean slate
44+
allow(WhitehallMigration::DocumentExport).to receive(:exportable_documents).and_return(documents)
45+
end
46+
47+
let(:documents) do
48+
[
49+
create(:document, :with_live_edition),
50+
create(:document, :with_live_edition),
51+
create(:document, :with_live_edition),
52+
]
53+
end
54+
55+
it "lists how many documents it is about to export" do
56+
expect { Rake::Task["export:live_documents_and_assets"].invoke }.to output(/^Exporting 3 live editions/).to_stdout
57+
end
58+
59+
it "calls WhitehallMigration::DocumentExport.export_to_hash with correct arguments" do
60+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash)
61+
Rake::Task["export:live_documents_and_assets"].invoke
62+
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[0])
63+
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[1])
64+
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[2])
65+
end
66+
67+
it "pretty-prints the result to STDOUT if no output_directory is specified" do
68+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar" })
69+
expect { Rake::Task["export:live_documents_and_assets"].invoke }.to output(/{:foo=>"bar"}\n{:foo=>"bar"}\n{:foo=>"bar"}\n$/).to_stdout
70+
end
71+
72+
it "writes the result as JSON files to the given output_directory if specified" do
73+
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash) do |document|
74+
{ base_path: "/news/example-path-#{document.id}" }
75+
end
76+
output_directory = Dir.mktmpdir
77+
Rake::Task["export:live_documents_and_assets"].invoke(output_directory)
78+
79+
expected_files = documents.map { |doc| "#{output_directory}/example-path-#{doc.id}.json" }
80+
actual_files = Dir.glob("#{output_directory}/*.json").sort
81+
expect(actual_files).to match_array(expected_files)
82+
end
83+
end
84+
end

0 commit comments

Comments
 (0)