DEV: Add script to generate schema for intermediate DB (#24484)

This will be used by migration scripts.
This commit is contained in:
Selase Krakani
2024-01-22 17:45:52 +00:00
committed by GitHub
parent 4f901cae8f
commit 93264da206
5 changed files with 1127 additions and 0 deletions

View File

@ -0,0 +1,481 @@
## Configuration options for the base intermediate schema generator
##
## After modifying this file, regenerate the base intermediate schema
## by running the `generate_schema` script.
# Default relative path for generated base schema file.
# An absolute path can also be provided to the script as the first CLI argument.
# If the CLI argument is present, it takes precedence over the value specified here.
output_file_path: ../common/intermediate_db_schema/000_base_schema.sql
## Tables to include in the generated base intermediate schema.
##
## Available table options:
## virtual: Boolean. Enables the inclusion of a table in the schema solely based.
## on the provided configuration. A virtual table does not need to be available in the core schema.
## ignore: List of columns to ignore. Convenient if most of the table's column are needed.
## Usage is mutually exclusive with the `include` option. Only one should be used at a time.
## include: List of columns to include. Convenient if only a few columns are needed.
## Usage is mutually exclusive with the `include`` option. Only one should be used at a time.
## primary_key: Literal or list of columns to use as primary key.
## extend: List of objects describing columns to be added/extended.
## The following options are available for an "extend" object:
## name: Required. The name of the column being extended.
## is_null: Specifies if the column can be null.
## type: Column type. Defaults to TEXT.
## indexes: List of indexes to create. The following options are available for an "index" object:
## name: Index name.
## columns: List of column(s) to index.
tables:
schema_migrations:
virtual: true
primary_key: path
extend:
- name: path
is_null: false
- name: created_at
type: datetime
config:
virtual: true
primary_key: name
extend:
- name: name
is_null: false
- name: value
is_null: false
log_entries:
virtual: true
extend:
- name: created_at
type: datetime
is_null: false
- name: type
is_null: false
- name: message
is_null: false
- name: exception
- name: details
users:
ignore:
- seen_notification_id
- last_posted_at
- password_hash
- salt
- active
- last_emailed_at
- approved_by_id
- previous_visit_at
- suspended_at
- suspended_till
- views
- flag_level
- ip_address
- title
- uploaded_avatar_id
- locale
- primary_group_id
- first_seen_at
- silenced_till
- group_locked_trust_level
- manual_locked_trust_level
- secure_identifier
- flair_group_id
- last_seen_reviewable_id
- password_algorithm
- username_lower
extend:
- name: email
- name: created_at
is_null: true
- name: staged
is_null: true
- name: avatar_path
- name: avatar_url
- name: avatar_upload_id
- name: bio
- name: password
is_null: true
- name: trust_level
is_null: true
- name: suspension
- name: location
- name: website
- name: old_relative_url
- name: sso_record
- name: anonymized
type: boolean
- name: original_username
- name: timezone
- name: email_level
type: integer
- name: email_messages_level
type: integer
- name: email_digests
type: boolean
categories:
ignore:
- topic_id
- topic_count
- user_id
- topics_year
- topics_month
- topics_week
- auto_close_hours
- post_count
- latest_post_id
- latest_topic_id
- posts_year
- posts_month
- posts_week
- email_in
- email_in_allow_strangers
- topics_day
- posts_day
- allow_badges
- name_lower
- auto_close_based_on_last_post
- topic_template
- contains_messages
- sort_order
- sort_ascending
- uploaded_logo_id
- uploaded_background_id
- topic_featured_link_allowed
- all_topics_wiki
- show_subcategory_list
- num_featured_topics
- default_view
- subcategory_list_style
- default_top_period
- mailinglist_mirror
- minimum_required_tags
- navigate_to_first_post_after_read
- search_priority
- allow_global_tags
- reviewable_by_group_id
- read_only_banner
- default_list_filter
- allow_unlimited_owner_edits_on_first_post
- default_slow_mode_seconds
- uploaded_logo_dark_id
- uploaded_background_dark_id
extend:
- name: about_topic_title
- name: old_relative_url
- name: existing_id
type: integer
- name: permissions
type: json_text # JSON_TEXT ???
- name: logo_upload_id
- name: tag_group_ids
type: json_text # JSON_TEXT ???
topics:
ignore:
- last_posted_at
- posts_count
- last_post_user_id
- reply_count
- featured_user1_id
- featured_user2_id
- featured_user3_id
- featured_user4_id
- deleted_at
- highest_post_number
- like_count
- incoming_link_count
- moderator_posts_count
- bumped_at
- has_summary
- archetype
- notify_moderators_count
- spam_count
- score
- percent_rank
- slug
- deleted_by_id
- participant_count
- word_count
- excerpt
- fancy_title
- highest_staff_post_number
- featured_link
- reviewable_score
- image_upload_id
- slow_mode_seconds
- bannered_until
- external_id
extend:
- name: old_relative_url
- name: private_message
posts:
ignore:
- cooked
- reply_to_post_number
- reply_count
- quote_count
- deleted_at
- off_topic_count
- incoming_link_count
- bookmark_count
- score
- reads
- post_type
- sort_order
- last_editor_id
- hidden
- hidden_reason_id
- notify_moderators_count
- spam_count
- illegal_count
- inappropriate_count
- last_version_at
- user_deleted
- reply_to_user_id
- percent_rank
- notify_user_count
- like_score
- deleted_by_id
- edit_reason
- word_count
- version
- cook_method
- wiki
- baked_at
- baked_version
- hidden_at
- self_edits
- reply_quoted
- via_email
- raw_email
- public_version
- action_code
- locked_by_id
- image_upload_id
- outbound_message_id
- qa_vote_count # TODO: added from plugin, maybe skip these automatically for core schema?
extend:
- name: reply_to_post_id # NOTE: should this be text??
- name: original_raw
- name: upload_ids
type: json_text
- name: post_number
type: integer
- name: old_relative_url
- name: accepted_answer
type: boolean
- name: small_action
- name: whisper
type: boolean
- name: placeholders
type: json_text
indexes:
- name: posts_by_topic_post_number
columns: [topic_id, post_number]
uploads:
ignore:
- original_filename
- filesize
- width
- height
- url
- created_at
- sha1
- origin
- retain_hours
- extension
- thumbnail_width
- thumbnail_height
- etag
- secure
- access_control_post_id
- original_sha1
- animated
- verification_status
- security_last_changed_at
- security_last_changed_reason
- dominant_color
extend:
- name: filename
is_null: false
- name: relative_path
- name: type
- name: data
type: blob
groups:
include:
- id
- name
- full_name
- visibility_level
- members_visibility_level
- mentionable_level
- messageable_level
extend:
- name: description
group_members:
virtual: true
primary_key: [group_id, user_id]
extend:
- name: group_id
type: integer
- name: user_id
type: integer
- name: owner
type: boolean
likes:
virtual: true
primary_key: [user_id, post_id]
extend:
- name: post_id
type: integer
is_null: false
- name: user_id
type: integer
is_null: false
- name: created_at
type: datetime
is_null: false
# TODO: Pending default values & auto incrementing id column
user_fields:
ignore:
- created_at
- external_name
- external_type
extend:
- name: options
type: json_text
muted_users:
primary_key: [user_id, muted_user_id]
ignore:
- id
- created_at
# NOTE: Perhaps use core's user_field_options instead?
user_field_values:
virtual: true
extend:
- name: user_id
type: integer
is_null: false
- name: field_id
type: integer
is_null: false
- name: is_multiselect_field
type: boolean
is_null: false
- name: value
indexes:
- name: user_field_values_multiselect
columns: [user_id, field_id, value]
unique: true
condition: WHERE is_multiselect_field = TRUE
- name: user_field_values_not_multiselect
columns: [user_id, field_id]
unique: true
condition: WHERE is_multiselect_field = FALSE
tags:
include:
- id
- name
extend:
- name: tag_group_id
type: integer
tag_groups:
include:
- id
- name
topic_tags:
primary_key: [topic_id, tag_id]
ignore:
- id
- created_at
tag_users:
primary_key: [tag_id, user_id]
ignore:
- id
- created_at
badges:
ignore:
- grant_count
- allow_title
- icon
- listable
- target_posts
- enabled
- auto_revoke
- trigger
- show_posts
- system
- image
- badge_grouping_id
extend:
- name: bage_group
user_badges:
include:
- user_id
- badge_id
- granted_at
topic_users:
primary_key: [user_id, topic_id]
ignore:
- id
- posted
- cleared_pinned_at
- last_emailed_post_number
- liked
- bookmarked
- last_posted_at
permalink_normalizations:
virtual: true
primary_key: normalization
extend:
- name: normalization
is_null: false
site_settings:
include:
- name
- value
extend:
- name: action
category_custom_fields:
primary_key: [category_id, name]
ignore:
- id
- created_at
post_custom_fields:
primary_key: [post_id, name]
ignore:
- id
- created_at
polls: {}
poll_options:
ignore:
- digest
- html
- anonymous_votes
extend:
- name: poll_id
is_null: false
- name: text
is_null: false
- name: position
type: integer
- name: created_at
is_null: true
poll_votes:
primary_key: [poll_option_id, user_id]
ignore: [poll_id]
extend:
- name: created_at
is_null: true
- name: poll_option_id
is_null: false
- name: user_id
is_null: false
## Schema-wide column configuration options. These options apply to all tables.
## See table specific column configuration options above.
##
## Available Options:
## ignore: List of core/plugin table columns to ignore and exclude from intermediate schema.
columns:
ignore:
- updated_at

View File

@ -0,0 +1,263 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
# Generate the converter's base intermediate database migration file from
# the core database state and YAML configuration in schema.yml
# Invoke from core root directory as `./migrations/scripts/generate_schema`
# It accepts an optional command line argument for the output file path which
# overrides the path configured in schema.yml
puts "Loading application..."
require_relative "../../config/environment"
module Migrations
class SchemaGenerator
def initialize(opts = {})
config = YAML.load_file(File.join(__dir__, "schema.yml"), symbolize_names: true)
@core_db_connection = ActiveRecord::Base.connection
@output_stream = StringIO.new
@indirectly_ignored_columns = Hash.new { |h, k| h[k] = [] }
@output_file_path = opts[:output_file_path] || config[:output_file_path]
@table_configs = config[:tables]
@column_configs = config[:columns]
@configured_table_names = @table_configs&.keys&.sort || []
@global_column_ignore_list = @column_configs[:ignore] || []
end
def run
puts "Generating base converter migration file for Discourse #{Discourse::VERSION::STRING}"
generate_header
generate_tables
generate_indirectly_ignored_columns_log
generate_migration_file
puts "", "Done"
end
private
def generate_header
@output_stream.puts <<~HEADER
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
please update the `schema.yml` configuration file and re-run the `generate_schema` script to update it.
*/
HEADER
end
def generate_tables
puts "Generating tables..."
@configured_table_names.each do |name|
raise "Core table named '#{name}' not found" unless valid_table?(name)
generate_table(name)
end
end
def generate_indirectly_ignored_columns_log
puts "Generating indirectly ignored column list..."
@output_stream.puts "\n\n/*"
@output_stream.puts <<~NOTE
Core table columns implicitly excluded from the generated schema above via the `include` configuration option
in `schema.yml`. This serves as an inventory of these columns, allowing new core additions to be tracked and,
if necessary, synchronized with the intermediate database schema.\n
NOTE
@indirectly_ignored_columns.each_with_index do |(table_name, columns), index|
next if virtual_table?(table_name) || columns.blank?
@output_stream.puts "" if index.positive?
@output_stream.puts "Table: #{table_name}"
@output_stream.puts "--------#{"-" * table_name.length}"
columns.each do |column|
@output_stream.puts " #{column.name} #{column.type} #{column.null}"
end
end
@output_stream.puts "*/"
end
def generate_migration_file
file_path = File.expand_path(@output_file_path, __dir__)
puts "Generating base migration file '#{file_path}'..."
File.open(file_path, "w") { |f| f << @output_stream.string.chomp }
end
def generate_column_definition(column)
definition = " #{column.name} #{type(column)}"
definition << " NOT NULL" unless column.null
definition
end
def generate_index(table_name, index)
@output_stream.print "CREATE "
@output_stream.print "UNIQUE " if index[:unique]
@output_stream.print "INDEX #{index[:name]} ON #{table_name} (#{index[:columns].join(", ")})"
@output_stream.print " #{index[:condition]}" if index[:condition].present?
@output_stream.puts ";"
end
def column_list_for(table_name)
ignore_columns = @table_configs.dig(table_name, :ignore) || []
include_columns = @table_configs.dig(table_name, :include) || []
include_columns.present? ? [:include, include_columns] : [:ignore, ignore_columns]
end
def generate_table(name)
puts "Generating #{name}..."
column_definitions = []
column_records = columns(name)
mode, column_list = column_list_for(name)
indexes = indexes(name)
configured_primary_key = primary_key(name)
primary_key, composite_key =
if configured_primary_key.present?
[configured_primary_key].flatten.each do |pk|
if column_records.map(&:name).exclude?(pk)
raise "Column named '#{pk}' does not exist in table '#{name}'"
end
end
[
configured_primary_key,
configured_primary_key.is_a?(Array) && configured_primary_key.length > 1,
]
else
virtual_table?(name) ? [] : [@core_db_connection.primary_key(name), false]
end
@output_stream.puts ""
@output_stream.puts "CREATE TABLE #{name} ("
if !composite_key && primary_key.present?
primary_key_column = column_records.find { |c| c.name == primary_key }
if (mode == :include && column_list.include?(primary_key_column.name)) ||
(mode == :ignore && column_list.exclude?(primary_key_column.name))
column_definitions << " #{primary_key_column.name} #{type(primary_key_column)} NOT NULL PRIMARY KEY"
end
end
column_records.each do |column|
next if @global_column_ignore_list.include?(column.name)
next if (mode == :ignore) && column_list.include?(column.name)
if !column.is_a?(CustomColumn) && (mode == :include) && column_list.exclude?(column.name)
@indirectly_ignored_columns[name] << column
next
end
next if !composite_key && (column.name == primary_key)
column_definitions << generate_column_definition(column)
end
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
@output_stream.puts column_definitions.join(",\n")
@output_stream.puts ");"
@output_stream.puts "" if indexes.present?
indexes.each { |index| generate_index(name, index) }
end
class CustomColumn
attr_reader :name
def initialize(name, type, null)
@name = name
@raw_type = type
@raw_null = null
end
def type
@raw_type&.to_sym || :text
end
def null
@raw_null.nil? ? true : @raw_null
end
def merge!(other_column)
@raw_null = other_column.null if @raw_null.nil?
@raw_type ||= other_column.type
self
end
end
def columns(name)
extensions = column_extensions(name)
return extensions if virtual_table?(name)
default_columns = @core_db_connection.columns(name)
return default_columns if extensions.blank?
extended_columns =
default_columns.map do |default_column|
extension = extensions.find { |ext| ext.name == default_column.name }
if extension
extensions.delete(extension)
extension.merge!(default_column)
else
default_column
end
end
extended_columns + extensions
end
def column_extensions(name)
extensions = @table_configs.dig(name, :extend)
return [] if extensions.nil?
extensions.map { |column| CustomColumn.new(column[:name], column[:type], column[:is_null]) }
end
def type(column)
case column.type
when :string, :inet
"TEXT"
else
column.type.to_s.upcase
end
end
def valid_table?(name)
@core_db_connection.tables.include?(name.to_s) || virtual_table?(name)
end
def virtual_table?(name)
!!@table_configs.dig(name, :virtual)
end
def indexes(table_name)
@table_configs.dig(table_name, :indexes) || []
end
def primary_key(table_name)
@table_configs.dig(table_name, :primary_key)
end
end
end
Migrations::SchemaGenerator.new(output_file_path: ARGV.first).run