Skip to content
This repository was archived by the owner on Oct 27, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions app/models/whitehall_migration/document_export.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
class WhitehallMigration::DocumentExport
def self.exportable_documents
@exportable_documents ||= Document
.includes(:live_edition)
.select do |document|
document.live_edition && document.live_edition.state != "removed"
end
end

def self.export_to_hash(document)
content_revision = document.live_edition.revision.content_revision

{
content_id: document[:content_id],
state: document.live_edition.state,
created_at: document[:created_at],
first_published_at: PublishingApiPayload::History.new(document.live_edition).first_published_at,
updated_at: document[:updated_at],
created_by: User.find(document.created_by_id).email,
Comment thread
ChrisBAshton marked this conversation as resolved.
last_edited_by: User.find(document.live_edition.revision.created_by_id).email,
document_type: document.live_edition.revision.metadata_revision.document_type_id,
title: content_revision.title,
base_path: content_revision.base_path,
summary: content_revision.summary,
body: content_revision.contents["body"],
tags: document.live_edition.revision.tags_revision.tags,
political: document.live_edition.political?,
government_id: document.live_edition.government_id,
change_notes: change_notes(document),
Comment thread
ChrisBAshton marked this conversation as resolved.
internal_history: internal_history(document),
images: export_images(document),
attachments: export_attachments(document),
}
end

def self.change_notes(document)
PublishingApiPayload::History.new(document.live_edition).change_history
end

def self.internal_history(document)
timeline_entries = TimelineEntry.where(document:)
.includes(:created_by, :details)
.order(created_at: :desc)
.includes(:edition)

timeline_entries.map do |entry|
entry_content = if entry.internal_note? && entry.details
entry.details.body
elsif (entry.withdrawn? || entry.withdrawn_updated?) && entry.details
entry.details.public_explanation
end

{
edition_number: entry.edition.number,
entry_type: entry.entry_type,
date: entry.created_at.to_fs(:date),
time: entry.created_at.to_fs(:time),
user: entry.created_by.email,
entry_content:,
}
end
end

def self.export_images(document)
revision = document.live_edition.revision
lead_image_revision = revision.lead_image_revision
all_image_revisions = revision.image_revisions

all_image_revisions.map do |image_revision|
{
created_at: image_revision.created_at,
caption: image_revision.caption,
alt_text: image_revision.alt_text,
credit: image_revision.credit,
lead_image: image_revision == lead_image_revision,
variants: image_revision.blob_revision.assets.map do |asset|
{
variant: asset.variant,
file_url: asset.file_url,
}
end,
}
end
end

def self.export_attachments(document)
Comment thread
ChrisBAshton marked this conversation as resolved.
revision = document.live_edition.revision
all_file_attachment_revisions = revision.file_attachment_revisions

all_file_attachment_revisions.map do |file_attachment_revision|
metadata = file_attachment_revision.metadata_revision
{
file_url: file_attachment_revision.asset.file_url,
title: metadata.title,
created_at: file_attachment_revision.created_at,
}
end
end
end
32 changes: 32 additions & 0 deletions lib/tasks/export.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
require "json"

namespace :export do
desc "Export a specific live document and its assets, by its content ID"
task :live_document_and_assets, %i[content_id output_file] => :environment do |_, args|
document = Document.find_by(content_id: args[:content_id])
hash = WhitehallMigration::DocumentExport.export_to_hash(document)

if args[:output_file]
File.write(args[:output_file], JSON.pretty_generate(hash))
else
pp hash
end
end

desc "Export all live documents and assets"
task :live_documents_and_assets, %i[output_directory] => :environment do |_, args|
documents = WhitehallMigration::DocumentExport.exportable_documents

puts "Exporting #{documents.count} live editions"

documents.each do |document|
hash = WhitehallMigration::DocumentExport.export_to_hash(document)

if args[:output_directory]
File.write("#{args[:output_directory]}/#{hash[:base_path].split('/').last}.json", JSON.pretty_generate(hash))
else
pp hash
end
end
end
end
1 change: 1 addition & 0 deletions lib/versioning/revision_updater.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Versioning
class RevisionUpdater < BaseUpdater
require_relative "./revision_updater/image"
include RevisionUpdater::Image
include RevisionUpdater::FileAttachment

Expand Down
8 changes: 8 additions & 0 deletions spec/factories/edition_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@
end
end

trait :published_but_needs_2i do
published

transient do
state { "published_but_needs_2i" }
end
end

trait :withdrawn do
summary { SecureRandom.alphanumeric(10) }
live { true }
Expand Down
84 changes: 84 additions & 0 deletions spec/lib/tasks/export_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
RSpec.describe "Export tasks" do
include ActiveJob::TestHelper

describe "export:live_document_and_assets" do
before do
Rake::Task["export:live_document_and_assets"].reenable
end

it "calls WhitehallMigration::DocumentExport.export_to_hash with correct arguments" do
document = create(:document, :with_live_edition)
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash)
Rake::Task["export:live_document_and_assets"].invoke(document.content_id)
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(document)
end

it "pretty-prints the result to STDOUT if no output_file is specified" do
document = create(:document, :with_live_edition)
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar" })
expect { Rake::Task["export:live_document_and_assets"].invoke(document.content_id) }.to output("{:foo=>\"bar\"}\n").to_stdout
end

it "writes the result as JSON to the given output_file if specified" do
document = create(:document, :with_live_edition)
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar", baz: "qux" })

output_file = Tempfile.new("export")
Rake::Task["export:live_document_and_assets"].invoke(document.content_id, output_file.path)

expected = <<~JSON
{
"foo": "bar",
"baz": "qux"
}
JSON
expect(File.read(output_file.path)).to match(expected.strip)
end
end

describe "export:live_documents_and_assets" do
before do
allow($stdout).to receive(:puts) # suppress output for cleanliness
Rake::Task["export:live_documents_and_assets"].reenable
Document.find_each(&:destroy) # Clean slate
allow(WhitehallMigration::DocumentExport).to receive(:exportable_documents).and_return(documents)
end

let(:documents) do
[
create(:document, :with_live_edition),
create(:document, :with_live_edition),
create(:document, :with_live_edition),
]
end

it "lists how many documents it is about to export" do
expect { Rake::Task["export:live_documents_and_assets"].invoke }.to output(/^Exporting 3 live editions/).to_stdout
end

it "calls WhitehallMigration::DocumentExport.export_to_hash with correct arguments" do
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash)
Rake::Task["export:live_documents_and_assets"].invoke
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[0])
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[1])
expect(WhitehallMigration::DocumentExport).to have_received(:export_to_hash).with(documents[2])
end

it "pretty-prints the result to STDOUT if no output_directory is specified" do
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash).and_return({ foo: "bar" })
expect { Rake::Task["export:live_documents_and_assets"].invoke }.to output(/{:foo=>"bar"}\n{:foo=>"bar"}\n{:foo=>"bar"}\n$/).to_stdout
end

it "writes the result as JSON files to the given output_directory if specified" do
allow(WhitehallMigration::DocumentExport).to receive(:export_to_hash) do |document|
{ base_path: "/news/example-path-#{document.id}" }
end
output_directory = Dir.mktmpdir
Rake::Task["export:live_documents_and_assets"].invoke(output_directory)

expected_files = documents.map { |doc| "#{output_directory}/example-path-#{doc.id}.json" }
actual_files = Dir.glob("#{output_directory}/*.json").sort
expect(actual_files).to match_array(expected_files)
end
end
end
Loading