DEV: Prepare new structure for migrations-tooling (#26631)

* Moves existing files around. All essential scripts are in `migrations/bin`, and non-essential scripts like benchmarks are in `migrations/scripts`
* Dependabot configuration for migrations-tooling (disabled for now)
* Updates test configuration for migrations-tooling
* Shorter configuration for intermediate DB for now. We will add the rest table by table.
* Adds a couple of benchmark scripts
* RSpec setup especially for migrations-tooling and the first tests
* Adds sorting/formatting to the `generate_schema` script
This commit is contained in:
Gerhard Schlager
2024-04-15 18:47:40 +02:00
committed by GitHub
parent 831da05103
commit d286c1d5a1
34 changed files with 1287 additions and 396 deletions

View File

@ -0,0 +1,203 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
require "benchmark"
require "tempfile"
gemfile(true) do
source "https://rubygems.org"
gem "extralite-bundle", require: "extralite"
gem "sqlite3"
gem "duckdb"
end
ROW_COUNT = 50_000_000
SOME_DATA = ["The quick, brown fox jumps over a lazy dog.", 1_234_567_890]
def with_db_path
tempfile = Tempfile.new
yield tempfile.path
ensure
tempfile.close
tempfile.unlink
end
module Sqlite
TRANSACTION_SIZE = 1000
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
some_text TEXT,
some_number INTEGER
)
SQL
INSERT_SQL = "INSERT INTO foo (some_text, some_number) VALUES (?, ?)"
class Sqlite3Benchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = SQLite3::Database.new(@tempfile.path)
@connection.journal_mode = "wal"
@connection.synchronous = "off"
@connection.temp_store = "memory"
@connection.locking_mode = "normal"
@connection.cache_size = -10_000 # 10_000 pages
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.transaction(:deferred)
end
def commit_transaction
return unless @connection.transaction_active?
@connection.commit
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
class ExtraliteBenchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = Extralite::Database.new(@tempfile.path)
@connection.pragma(
journal_mode: "wal",
synchronous: "off",
temp_store: "memory",
locking_mode: "normal",
cache_size: -10_000, # 10_000 pages
)
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.execute("BEGIN DEFERRED TRANSACTION")
end
def commit_transaction
return unless @connection.transaction_active?
@connection.execute("COMMIT")
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
end
class DuckDbBenchmark
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY,
some_text TEXT,
some_number INTEGER
)
SQL
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
FileUtils.rm(@tempfile.path)
@db = DuckDB::Database.open(@tempfile.path)
@connection = @db.connect
@connection.query(CREATE_TABLE_SQL)
@appender = @connection.appender("foo")
end
def run
@row_count.times do |id|
@appender.begin_row
@appender.append(id)
@appender.append(SOME_DATA[0])
@appender.append(SOME_DATA[1])
@appender.end_row
end
close
end
private
def close
@appender.close
@connection.close
@db.close
end
end
Benchmark.bm(15) do |x|
x.report("SQLite3") { Sqlite::Sqlite3Benchmark.new(ROW_COUNT).run }
x.report("Extralite") { Sqlite::ExtraliteBenchmark.new(ROW_COUNT).run }
x.report("DuckDB") { DuckDbBenchmark.new(ROW_COUNT).run }
end

View File

@ -0,0 +1,114 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
gem "extralite-bundle", github: "digital-fabric/extralite"
gem "sqlite3"
end
require "extralite"
require "benchmark/ips"
require "time"
require "securerandom"
User = Data.define(:id, :name, :email, :created_at)
USER_HASH =
begin
name = SecureRandom.hex(10)
{ id: 1, name: name, email: "#{name}@example.com", created_at: Time.now.utc.iso8601 }
end
USER_DATA =
User.new(
id: USER_HASH[:id],
name: USER_HASH[:name],
email: USER_HASH[:email],
created_at: USER_HASH[:created_at],
)
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
SQL_INSERT_NAMED = "INSERT INTO users VALUES (:id, :name, :email, :created_at)"
def create_extralite_db
db = Extralite::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_sqlite3_db
db = SQLite3::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_users(row_count)
row_count.times.map { |id| }
end
def insert_extralite_regular(stmt, user)
stmt.execute(user.id, user.name, user.email, user.created_at)
end
def insert_extralite_hash(stmt, user)
stmt.execute(user)
end
def insert_extralite_data(stmt, user)
stmt.execute(user)
end
def insert_sqlite3_regular(stmt, user)
stmt.execute(user.id, user.name, user.email, user.created_at)
end
def insert_sqlite3_hash(stmt, user)
stmt.execute(user)
end
puts "",
"Extralite SQLite version: #{Extralite.sqlite3_version}",
"SQLite version: #{SQLite3::SQLITE_VERSION}",
""
extralite_db = create_extralite_db
extralite_stmt_regular = extralite_db.prepare(SQL_INSERT)
extralite_stmt_named = extralite_db.prepare(SQL_INSERT_NAMED)
sqlite3_db = create_sqlite3_db
sqlite3_stmt_regular = sqlite3_db.prepare(SQL_INSERT)
sqlite3_stmt_named = sqlite3_db.prepare(SQL_INSERT_NAMED)
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Extralite regular") { insert_extralite_regular(extralite_stmt_regular, USER_DATA) }
x.report("Extralite hash") { insert_extralite_hash(extralite_stmt_named, USER_HASH) }
x.report("Extralite data") { insert_extralite_data(extralite_stmt_regular, USER_DATA) }
x.report("Extralite data/array") do
insert_extralite_data(extralite_stmt_regular, USER_DATA.deconstruct)
end
x.report("SQLite3 regular") { insert_sqlite3_regular(sqlite3_stmt_regular, USER_DATA) }
x.report("SQLite3 hash") { insert_sqlite3_hash(sqlite3_stmt_named, USER_HASH) }
x.report("SQLite3 data/hash") { insert_sqlite3_hash(sqlite3_stmt_named, USER_DATA.to_h) }
x.compare!
end
extralite_stmt_regular.close
extralite_stmt_named.close
extralite_db.close
sqlite3_stmt_regular.close
sqlite3_stmt_named.close
sqlite3_db.close

View File

@ -0,0 +1,105 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
gem "extralite-bundle", github: "digital-fabric/extralite"
gem "sqlite3"
end
require "extralite"
require "benchmark/ips"
require "time"
require "securerandom"
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
SQL_INSERT_NAMED = "INSERT INTO users VALUES (:id, :name, :email, :created_at)"
def create_extralite_db
db = Extralite::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_sqlite3_db
db = SQLite3::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_users(row_count)
row_count.times.map do |id|
name = SecureRandom.hex(10)
{ id: id, name: name, email: "#{name}@example.com", created_at: Time.now.utc.iso8601 }
end
end
def insert_extralite_regular(stmt, users)
users.each { |user| stmt.execute(user[:id], user[:name], user[:email], user[:created_at]) }
end
def insert_extralite_index(stmt, users)
users.each { |user| stmt.execute(user) }
end
def insert_extralite_named(stmt, users)
users.each { |user| stmt.execute(user) }
end
def insert_sqlite3_regular(stmt, users)
users.each { |user| stmt.execute(user[:id], user[:name], user[:email], user[:created_at]) }
end
def insert_sqlite3_named(stmt, users)
users.each { |user| stmt.execute(user) }
end
puts "",
"Extralite SQLite version: #{Extralite.sqlite3_version}",
"SQLite version: #{SQLite3::SQLITE_VERSION}",
""
extralite_db = create_extralite_db
extralite_stmt_regular = extralite_db.prepare(SQL_INSERT)
extralite_stmt_named = extralite_db.prepare(SQL_INSERT_NAMED)
sqlite3_db = create_sqlite3_db
sqlite3_stmt_regular = sqlite3_db.prepare(SQL_INSERT)
sqlite3_stmt_named = sqlite3_db.prepare(SQL_INSERT_NAMED)
users = create_users(1_000)
users_indexed =
users.map do |user|
{ 1 => user[:id], 2 => user[:name], 3 => user[:email], 4 => user[:created_at] }
end
users_array = users.map { |user| [user[:id], user[:name], user[:email], user[:created_at]] }
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Extralite regular") { insert_extralite_regular(extralite_stmt_regular, users) }
x.report("Extralite named") { insert_extralite_named(extralite_stmt_named, users) }
x.report("Extralite index") { insert_extralite_index(extralite_stmt_regular, users_indexed) }
x.report("Extralite array") { insert_extralite_index(extralite_stmt_regular, users_array) }
x.report("SQLite3 regular") { insert_sqlite3_regular(sqlite3_stmt_regular, users) }
x.report("SQLite3 named") { insert_sqlite3_named(sqlite3_stmt_named, users) }
x.compare!
end
extralite_stmt_regular.close
extralite_stmt_named.close
extralite_db.close
sqlite3_stmt_regular.close
sqlite3_stmt_named.close
sqlite3_db.close

View File

@ -0,0 +1,25 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
end
require "benchmark/ips"
require "time"
THE_TIME = Time.now.utc
DATE_TIME = DateTime.now.new_offset(0)
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Time#iso8601") { THE_TIME.iso8601 }
x.report("Time#strftime") { THE_TIME.strftime("%FT%TZ") }
x.report("DateTime#iso8601") { DATE_TIME.iso8601 }
x.compare!
end

View File

@ -0,0 +1,186 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "extralite-bundle", github: "digital-fabric/extralite"
end
require "etc"
require "extralite"
require "tempfile"
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
USER = [1, "John", "john@example.com", "2023-12-29T11:10:04Z"]
ROW_COUNT = Etc.nprocessors * 200_000
def create_extralite_db(path, initialize: false)
db = Extralite::Database.new(path)
db.pragma(
busy_timeout: 60_000, # 60 seconds
journal_mode: "wal",
synchronous: "off",
)
db.execute(SQL_TABLE) if initialize
db
end
def with_db_path
tempfile = Tempfile.new
db = create_extralite_db(tempfile.path, initialize: true)
db.close
yield tempfile.path
db = create_extralite_db(tempfile.path)
row_count = db.query_single_value("SELECT COUNT(*) FROM users")
puts "Row count: #{row_count}" if row_count != ROW_COUNT
db.close
ensure
tempfile.close
tempfile.unlink
end
class SingleWriter
def initialize(db_path, row_count)
@row_count = row_count
@db = create_extralite_db(db_path)
@stmt = @db.prepare(SQL_INSERT)
end
def write
@row_count.times { @stmt.execute(USER) }
@stmt.close
@db.close
end
end
class ForkedSameDbWriter
def initialize(db_path, row_count)
@row_count = row_count
@db_path = db_path
@pids = []
setup_forks
end
def setup_forks
fork_count = Etc.nprocessors
split_row_count = @row_count / fork_count
fork_count.times do
@pids << fork do
db = create_extralite_db(@db_path)
stmt = db.prepare(SQL_INSERT)
Signal.trap("USR1") do
split_row_count.times { stmt.execute(USER) }
stmt.close
db.close
exit
end
sleep
end
end
sleep(1)
end
def write
@pids.each { |pid| Process.kill("USR1", pid) }
Process.waitall
end
end
class ForkedMultiDbWriter
def initialize(db_path, row_count)
@row_count = row_count
@complete_db_path = db_path
@pids = []
@db_paths = []
@db = create_extralite_db(db_path)
setup_forks
end
def setup_forks
fork_count = Etc.nprocessors
split_row_count = @row_count / fork_count
fork_count.times do |i|
db_path = "#{@complete_db_path}-#{i}"
@db_paths << db_path
@pids << fork do
db = create_extralite_db(db_path, initialize: true)
stmt = db.prepare(SQL_INSERT)
Signal.trap("USR1") do
split_row_count.times { stmt.execute(USER) }
stmt.close
db.close
exit
end
sleep
end
end
sleep(2)
end
def write
@pids.each { |pid| Process.kill("USR1", pid) }
Process.waitall
@db_paths.each do |db_path|
@db.execute("ATTACH DATABASE ? AS db", db_path)
@db.execute("INSERT INTO users SELECT * FROM db.users")
@db.execute("DETACH DATABASE db")
end
@db.close
end
end
LABEL_WIDTH = 25
def benchmark(label, label_width = 15)
print "#{label} ..."
label = label.ljust(label_width)
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
yield
finish = Process.clock_gettime(Process::CLOCK_MONOTONIC)
time_diff = sprintf("%.4f", finish - start).rjust(9)
print "\r#{label} #{time_diff} seconds\n"
end
puts "", "Benchmarking write performance", ""
with_db_path do |db_path|
single_writer = SingleWriter.new(db_path, ROW_COUNT)
benchmark("single writer", LABEL_WIDTH) { single_writer.write }
end
with_db_path do |db_path|
forked_same_db_writer = ForkedSameDbWriter.new(db_path, ROW_COUNT)
benchmark("forked writer - same DB", LABEL_WIDTH) { forked_same_db_writer.write }
end
with_db_path do |db_path|
forked_multi_db_writer = ForkedMultiDbWriter.new(db_path, ROW_COUNT)
benchmark("forked writer - multi DB", LABEL_WIDTH) { forked_multi_db_writer.write }
end

View File

@ -7,13 +7,15 @@
# It accepts an optional command line argument for the output file path which
# overrides the path configured in schema.yml
puts "Loading application..."
require_relative "../../config/environment"
require_relative "../lib/migrations"
module Migrations
load_rails_environment
load_gemfiles("common")
class SchemaGenerator
def initialize(opts = {})
config = YAML.load_file(File.join(__dir__, "schema.yml"), symbolize_names: true)
config = load_config
@core_db_connection = ActiveRecord::Base.connection
@output_stream = StringIO.new
@ -25,7 +27,7 @@ module Migrations
@column_configs = config[:columns]
@configured_table_names = @table_configs&.keys&.sort || []
@global_column_ignore_list = @column_configs[:ignore] || []
@global_column_ignore_list = @column_configs&.fetch(:ignore) || []
end
def run
@ -35,13 +37,21 @@ module Migrations
generate_tables
generate_indirectly_ignored_columns_log
generate_migration_file
validate_migration_file
puts "", "Done"
end
private
def load_config
path = File.expand_path("../config/intermediate_db.yml", __dir__)
YAML.load_file(path, symbolize_names: true)
end
def generate_header
return if @configured_table_names.empty?
@output_stream.puts <<~HEADER
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
@ -61,6 +71,8 @@ module Migrations
end
def generate_indirectly_ignored_columns_log
return if @indirectly_ignored_columns.empty?
puts "Generating indirectly ignored column list..."
@output_stream.puts "\n\n/*"
@ -142,7 +154,8 @@ module Migrations
end
@output_stream.puts ""
@output_stream.puts "CREATE TABLE #{name} ("
@output_stream.puts "CREATE TABLE #{name}"
@output_stream.puts "("
if !composite_key && primary_key.present?
primary_key_column = column_records.find { |c| c.name == primary_key }
@ -166,7 +179,9 @@ module Migrations
column_definitions << generate_column_definition(column)
end
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
format_columns!(column_definitions)
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
@output_stream.puts column_definitions.join(",\n")
@output_stream.puts ");"
@ -175,6 +190,36 @@ module Migrations
indexes.each { |index| generate_index(name, index) }
end
def validate_migration_file
db = Extralite::Database.new(":memory:")
if (sql = @output_stream.string).blank?
warn "No SQL generated, skipping validation".red
else
db.execute(sql)
end
ensure
db.close if db
end
def format_columns!(column_definitions)
column_definitions.map! do |c|
c.match(
/^\s*(?<name>\w+)\s(?<datatype>\w+)\s?(?<nullable>NOT NULL)?\s?(?<primary_key>PRIMARY KEY)?/,
).named_captures
end
max_name_length = column_definitions.map { |c| c["name"].length }.max
max_datatype_length = column_definitions.map { |c| c["datatype"].length }.max
column_definitions.sort_by! do |c|
[c["primary_key"] ? 0 : 1, c["nullable"] ? 0 : 1, c["name"]]
end
column_definitions.map! do |c|
" #{c["name"].ljust(max_name_length)} #{c["datatype"].ljust(max_datatype_length)} #{c["nullable"]} #{c["primary_key"]}".rstrip
end
end
class CustomColumn
attr_reader :name