mirror of
https://github.com/discourse/discourse.git
synced 2025-06-06 23:07:28 +08:00
DEV: Gracefully handle regex_replace
max column length violations (#29787)
* DEV: Gracefully handle `regex_replace` violations of column length constraints
This is a follow-up to the `remap` [refactor](9b0cfa99c5
).
Similar to `remap`, the entire `regex_replace` operation fails if the new content exceeds the column’s max length.
This change introduces an optional mode, controlled by the new `skip_max_length_violations` param
to skip records eligible for `regex_replace` where the new content violates the max column length constraint.
It also includes updates to the exception message raised when `regex_replace` fails to include more details
* DEV: Remove string escapes in heredoc text
This commit is contained in:
107
lib/db_helper.rb
107
lib/db_helper.rb
@ -33,27 +33,35 @@ class DbHelper
|
|||||||
|
|
||||||
return if text_columns.empty?
|
return if text_columns.empty?
|
||||||
|
|
||||||
pattern = "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}"
|
transforms = {
|
||||||
|
replacement: ->(column_name) { %|REPLACE("#{column_name}", :from, :to)| },
|
||||||
|
condition: ->(column_name) do
|
||||||
|
%|"#{column_name}" IS NOT NULL AND "#{column_name}" LIKE :pattern|
|
||||||
|
end,
|
||||||
|
}
|
||||||
|
|
||||||
|
query_params = {
|
||||||
|
from: from,
|
||||||
|
to: to,
|
||||||
|
pattern: "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}",
|
||||||
|
}
|
||||||
|
|
||||||
text_columns.each do |table, columns|
|
text_columns.each do |table, columns|
|
||||||
query_parts = build_remap_query_parts(table, columns, skip_max_length_violations)
|
query_parts =
|
||||||
|
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
||||||
|
|
||||||
begin
|
begin
|
||||||
rows_updated = DB.exec(<<~SQL, from: from, to: to, pattern: pattern)
|
rows_updated = execute_transform(table, query_parts, query_params)
|
||||||
UPDATE \"#{table}\"
|
|
||||||
SET #{query_parts[:updates].join(", ")}
|
|
||||||
WHERE #{query_parts[:conditions].join(" OR ")}
|
|
||||||
SQL
|
|
||||||
rescue PG::StringDataRightTruncation => e
|
rescue PG::StringDataRightTruncation => e
|
||||||
# Provide more context in the exeption message
|
# Provide more context in the exeption message
|
||||||
raise_contextualized_remap_exception(e, table, query_parts[:length_constrained_columns])
|
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
|
||||||
end
|
end
|
||||||
|
|
||||||
if verbose
|
if verbose
|
||||||
skipped_counts =
|
skipped_counts =
|
||||||
skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
|
||||||
|
|
||||||
log_remap_message(table, rows_updated, skipped_counts)
|
log_transform_result(table, rows_updated, skipped_counts)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -66,34 +74,40 @@ class DbHelper
|
|||||||
flags: "gi",
|
flags: "gi",
|
||||||
match: "~*",
|
match: "~*",
|
||||||
excluded_tables: [],
|
excluded_tables: [],
|
||||||
verbose: false
|
verbose: false,
|
||||||
|
skip_max_length_violations: false
|
||||||
)
|
)
|
||||||
text_columns = find_text_columns(excluded_tables)
|
text_columns = find_text_columns(excluded_tables)
|
||||||
|
|
||||||
|
return if text_columns.empty?
|
||||||
|
|
||||||
|
transforms = {
|
||||||
|
replacement: ->(column_name) do
|
||||||
|
%|REGEXP_REPLACE("#{column_name}", :pattern, :replacement, :flags)|
|
||||||
|
end,
|
||||||
|
condition: ->(column_name) do
|
||||||
|
%|"#{column_name}" IS NOT NULL AND "#{column_name}" #{match} :pattern|
|
||||||
|
end,
|
||||||
|
}
|
||||||
|
|
||||||
|
query_params = { pattern: pattern, replacement: replacement, flags: flags }
|
||||||
|
|
||||||
text_columns.each do |table, columns|
|
text_columns.each do |table, columns|
|
||||||
set =
|
query_parts =
|
||||||
columns
|
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
||||||
.map do |column|
|
|
||||||
replace = "REGEXP_REPLACE(\"#{column[:name]}\", :pattern, :replacement, :flags)"
|
begin
|
||||||
replace = truncate(replace, table, column)
|
rows_updated = execute_transform(table, query_parts, query_params)
|
||||||
"\"#{column[:name]}\" = #{replace}"
|
rescue PG::StringDataRightTruncation => e
|
||||||
|
# Provide more context in the exeption message
|
||||||
|
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
|
||||||
end
|
end
|
||||||
.join(", ")
|
|
||||||
|
|
||||||
where =
|
if verbose
|
||||||
columns
|
skipped_counts =
|
||||||
.map do |column|
|
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
|
||||||
"\"#{column[:name]}\" IS NOT NULL AND \"#{column[:name]}\" #{match} :pattern"
|
log_transform_result(table, rows_updated, skipped_counts)
|
||||||
end
|
end
|
||||||
.join(" OR ")
|
|
||||||
|
|
||||||
rows = DB.exec(<<~SQL, pattern: pattern, replacement: replacement, flags: flags, match: match)
|
|
||||||
UPDATE \"#{table}\"
|
|
||||||
SET #{set}
|
|
||||||
WHERE #{where}
|
|
||||||
SQL
|
|
||||||
|
|
||||||
puts "#{table}=#{rows}" if verbose && rows > 0
|
|
||||||
end
|
end
|
||||||
|
|
||||||
finish!
|
finish!
|
||||||
@ -164,11 +178,11 @@ class DbHelper
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.build_remap_query_parts(table, columns, skip_max_length_violations)
|
def self.build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
||||||
columns.each_with_object(
|
columns.each_with_object(
|
||||||
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
|
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
|
||||||
) do |column, parts|
|
) do |column, parts|
|
||||||
replace = %|REPLACE("#{column[:name]}", :from, :to)|
|
replace = transforms[:replacement].call(column[:name])
|
||||||
replace = truncate(replace, table, column)
|
replace = truncate(replace, table, column)
|
||||||
|
|
||||||
if column[:max_length].present?
|
if column[:max_length].present?
|
||||||
@ -177,10 +191,10 @@ class DbHelper
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Build SQL update statements for each column
|
# Build SQL update statements for each column
|
||||||
parts[:updates] << %("#{column[:name]}" = #{replace})
|
parts[:updates] << %|"#{column[:name]}" = #{replace}|
|
||||||
|
|
||||||
# Build the base SQL condition clause for each column
|
# Build the base SQL condition clause for each column
|
||||||
basic_condition = %("#{column[:name]}" IS NOT NULL AND "#{column[:name]}" LIKE :pattern)
|
basic_condition = transforms[:condition].call(column[:name])
|
||||||
|
|
||||||
if skip_max_length_violations && column[:max_length].present?
|
if skip_max_length_violations && column[:max_length].present?
|
||||||
# Extend base condition to skip updates that would violate the column length constraint
|
# Extend base condition to skip updates that would violate the column length constraint
|
||||||
@ -204,7 +218,18 @@ class DbHelper
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.log_remap_message(table, rows_updated, skipped_counts)
|
def self.skipped_transform_counts(table, query_parts, skip_max_length_violations, params)
|
||||||
|
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
||||||
|
|
||||||
|
skipped = DB.query_hash(<<~SQL, params).first
|
||||||
|
SELECT #{query_parts[:skipped_sums].join(", ")}
|
||||||
|
FROM "#{table}"
|
||||||
|
SQL
|
||||||
|
|
||||||
|
skipped.select { |_, count| count.to_i > 0 }
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.log_transform_result(table, rows_updated, skipped_counts)
|
||||||
return if rows_updated == 0 && skipped_counts.blank?
|
return if rows_updated == 0 && skipped_counts.blank?
|
||||||
|
|
||||||
message = +"#{table}=#{rows_updated}"
|
message = +"#{table}=#{rows_updated}"
|
||||||
@ -221,6 +246,14 @@ class DbHelper
|
|||||||
puts message
|
puts message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.execute_transform(table, query_parts, params)
|
||||||
|
DB.exec(<<~SQL, params)
|
||||||
|
UPDATE "#{table}"
|
||||||
|
SET #{query_parts[:updates].join(", ")}
|
||||||
|
WHERE #{query_parts[:conditions].join(" OR ")}
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
||||||
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
||||||
|
|
||||||
@ -232,7 +265,7 @@ class DbHelper
|
|||||||
skipped.select { |_, count| count.to_i > 0 }
|
skipped.select { |_, count| count.to_i > 0 }
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.raise_contextualized_remap_exception(error, table, columns)
|
def self.raise_contextualized_transform_exception(error, table, columns)
|
||||||
details = "columns with length constraints: #{columns.join(", ")}"
|
details = "columns with length constraints: #{columns.join(", ")}"
|
||||||
|
|
||||||
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
|
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
RSpec.describe DbHelper do
|
RSpec.describe DbHelper do
|
||||||
describe ".remap" do
|
|
||||||
fab!(:bookmark1) { Fabricate(:bookmark, name: "short-bookmark") }
|
fab!(:bookmark1) { Fabricate(:bookmark, name: "short-bookmark") }
|
||||||
fab!(:bookmark2) { Fabricate(:bookmark, name: "another-bookmark") }
|
fab!(:bookmark2) { Fabricate(:bookmark, name: "another-bookmark") }
|
||||||
let(:bookmark_name_limit) { Bookmark.columns_hash["name"].limit }
|
let(:bookmark_name_limit) { Bookmark.columns_hash["name"].limit }
|
||||||
let(:long_bookmark_name) { "a" * (bookmark_name_limit + 1) }
|
let(:long_bookmark_name) { "a" * (bookmark_name_limit + 1) }
|
||||||
|
|
||||||
|
describe ".remap" do
|
||||||
it "should remap columns properly" do
|
it "should remap columns properly" do
|
||||||
post = Fabricate(:post, cooked: "this is a specialcode that I included")
|
post = Fabricate(:post, cooked: "this is a specialcode that I included")
|
||||||
post_attributes = post.reload.attributes
|
post_attributes = post.reload.attributes
|
||||||
@ -97,5 +97,43 @@ RSpec.describe DbHelper do
|
|||||||
|
|
||||||
expect(post.reload.raw).to include("[img]something[/img]")
|
expect(post.reload.raw).to include("[img]something[/img]")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when skip_max_length_violations is false" do
|
||||||
|
it "raises an exception if regexp_replace exceeds column length constraint by default" do
|
||||||
|
expect { DbHelper.regexp_replace("bookmark", long_bookmark_name) }.to raise_error(
|
||||||
|
PG::StringDataRightTruncation,
|
||||||
|
/value too long.*table: bookmarks,.*name/,
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when skip_max_length_violations is true" do
|
||||||
|
it "skips regexp_replace eligible rows if new value exceeds column length constraint" do
|
||||||
|
DbHelper.regexp_replace("bookmark", long_bookmark_name, skip_max_length_violations: true)
|
||||||
|
|
||||||
|
bookmark1.reload
|
||||||
|
bookmark2.reload
|
||||||
|
|
||||||
|
expect(bookmark1.name).to eq("short-bookmark")
|
||||||
|
expect(bookmark2.name).to eq("another-bookmark")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "logs skipped regexp_replace due to max length constraints when verbose is true" do
|
||||||
|
expect {
|
||||||
|
DbHelper.regexp_replace(
|
||||||
|
"bookmark",
|
||||||
|
long_bookmark_name,
|
||||||
|
verbose: true,
|
||||||
|
skip_max_length_violations: true,
|
||||||
|
)
|
||||||
|
}.to output(/SKIPPED:/).to_stdout
|
||||||
|
|
||||||
|
bookmark1.reload
|
||||||
|
bookmark2.reload
|
||||||
|
|
||||||
|
expect(bookmark1.name).to eq("short-bookmark")
|
||||||
|
expect(bookmark2.name).to eq("another-bookmark")
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user