Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
/docs/_data/unstable
/docs/.jekyll-metadata
node_modules

# Ignore exports folder
/exports
Comment on lines +6 to +8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to add an exports folder, the user can choose to export the CSV to wherever they want. Let's remove this.

1 change: 1 addition & 0 deletions dev/lib/product_taxonomy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ def data_path = DATA_PATH
require_relative "product_taxonomy/commands/add_attribute_command"
require_relative "product_taxonomy/commands/add_attributes_to_categories_command"
require_relative "product_taxonomy/commands/add_value_command"
require_relative "product_taxonomy/commands/compare_categories_command"
6 changes: 6 additions & 0 deletions dev/lib/product_taxonomy/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,11 @@ def add_attributes_to_categories(attribute_friendly_ids, category_ids)
def add_value(name, attribute_friendly_id)
AddValueCommand.new(options.merge(name:, attribute_friendly_id:)).run
end

desc "compare_categories VERSION_FOLDER", "Compare category changes between full_names.yml and categories.txt"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can make the description clearer, maybe something like this:

Suggested change
desc "compare_categories VERSION_FOLDER", "Compare category changes between full_names.yml and categories.txt"
desc "compare_categories VERSION_FOLDER", "Generate a CSV report showing category changes between the current taxonomy and Shopify taxonomy VERSION"

option :output_dir, type: :string, default: "exports", desc: "Output directory for CSV file"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be more flexible and idiomatic to accept the full output path of the CSV file (filename included), not just output dir

def compare_categories(version_folder)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find version_folder confusing here, isn't it just the version?

Suggested change
def compare_categories(version_folder)
def compare_categories(version)

CompareCategoriesCommand.new(options).run(version_folder)
end
end
end
165 changes: 165 additions & 0 deletions dev/lib/product_taxonomy/commands/compare_categories_command.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# frozen_string_literal: true

require "csv"
require "yaml"
require "fileutils"

module ProductTaxonomy
class CompareCategoriesCommand < Command
def execute(version_folder)
validate_version_folder!(version_folder)

logger.info("Loading full_names from version: #{version_folder}")
full_names = load_full_names(version_folder)
logger.info("Loaded #{full_names.size} categories from full_names.yml")

logger.info("Loading categories from dist/en/categories.txt")
categories = load_categories
logger.info("Loaded #{categories.size} categories from categories.txt")

logger.info("Comparing categories...")
changes = compare_categories(full_names, categories)

# Create output directory if it doesn't exist
output_dir = File.expand_path(options[:output_dir] || "exports", ProductTaxonomy.data_path)
FileUtils.mkdir_p(output_dir)

# Write CSV report
output_path = write_csv_report(changes, version_folder, output_dir)

# Print summary
logger.info("")
logger.info("Comparison complete!")
logger.info("Total changes detected: #{changes.size}")

if changes.any?
change_types = changes.group_by { |change| change[:type] }
change_types.each do |change_type, changes_of_type|
logger.info(" #{change_type.capitalize}s: #{changes_of_type.size}")
end

logger.info("")
logger.info("Detailed report saved to: #{output_path}")
else
logger.info("No changes detected between the two files.")
end
end

private

def validate_version_folder!(version_folder)
full_names_path = File.expand_path(
"integrations/shopify/#{version_folder}/full_names.yml",
ProductTaxonomy.data_path
)

unless File.exist?(full_names_path)
raise ArgumentError, "full_names.yml not found in #{version_folder}"
end
end

def load_full_names(version_folder)
full_names_path = File.expand_path(
"integrations/shopify/#{version_folder}/full_names.yml",
ProductTaxonomy.data_path
)

data = YAML.safe_load_file(full_names_path)

# Convert to hash with id as key and full_name as value
data.each_with_object({}) do |item, hash|
hash[item["id"]] = item["full_name"]
end
end

def load_categories
categories_path = File.expand_path("../dist/en/categories.txt", ProductTaxonomy.data_path)

unless File.exist?(categories_path)
raise ArgumentError, "categories.txt not found in dist/en/"
end

categories = {}

File.foreach(categories_path) do |line|
line = line.strip
next if line.empty? || line.start_with?("#")

# Parse format: gid://shopify/TaxonomyCategory/{id} : {full_name}
if line.include?(" : ")
gid_part, full_name = line.split(" : ", 2)

# Extract ID by removing the gid://shopify/TaxonomyCategory/ prefix
if gid_part.start_with?("gid://shopify/TaxonomyCategory/")
category_id = gid_part.gsub("gid://shopify/TaxonomyCategory/", "").strip
categories[category_id] = full_name.strip
end
end
end

categories
end

def compare_categories(full_names, categories)
changes = []

# Get all unique IDs from both sources
all_ids = (full_names.keys + categories.keys).uniq.sort

all_ids.each do |category_id|
in_full_names = full_names.key?(category_id)
in_categories = categories.key?(category_id)

if in_full_names && in_categories
# Check for renames (same ID, different name)
if full_names[category_id] != categories[category_id]
changes << {
type: :rename,
id: category_id,
old_name: full_names[category_id],
new_name: categories[category_id]
}
end
elsif in_full_names && !in_categories
# Archived (exists in full_names but not in categories)
changes << {
type: :archived,
id: category_id,
old_name: full_names[category_id],
new_name: ""
}
elsif !in_full_names && in_categories
# Addition (exists in categories but not in full_names)
changes << {
type: :addition,
id: category_id,
old_name: "",
new_name: categories[category_id]
}
end
end

changes
end

def write_csv_report(changes, version_folder, output_dir)
timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
filename = "category_changes_#{version_folder}_#{timestamp}.csv"
output_path = File.join(output_dir, filename)

CSV.open(output_path, "w", encoding: "utf-8") do |csv|
csv << %w[type id old_name new_name]
changes.each do |change|
csv << [
change[:type],
change[:id],
change[:old_name],
change[:new_name]
]
end
end

output_path
end
end
end