From 7398c3daf178a19e7539aa89fadee256ca0d4ae8 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Wed, 29 Nov 2023 10:37:28 +0800 Subject: [PATCH] [Feature-Variant](Variant Type) support variant type query and index (#27676) --- be/src/common/config.cpp | 5 +- be/src/common/config.h | 7 +- be/src/olap/accept_null_predicate.h | 9 +- be/src/olap/base_tablet.cpp | 5 +- be/src/olap/bitmap_filter_predicate.h | 4 + be/src/olap/block_column_predicate.h | 18 + be/src/olap/bloom_filter_predicate.h | 4 + be/src/olap/column_predicate.h | 11 +- be/src/olap/compaction.cpp | 10 +- be/src/olap/comparison_predicate.h | 20 +- be/src/olap/delta_writer.cpp | 12 +- be/src/olap/delta_writer_v2.cpp | 1 + be/src/olap/field.h | 5 +- be/src/olap/in_list_predicate.h | 10 +- be/src/olap/iterators.h | 6 + be/src/olap/like_column_predicate.h | 4 + be/src/olap/match_predicate.cpp | 38 +- be/src/olap/match_predicate.h | 7 +- be/src/olap/null_predicate.cpp | 5 +- be/src/olap/null_predicate.h | 8 +- be/src/olap/push_handler.cpp | 2 + be/src/olap/reader.cpp | 28 +- be/src/olap/reader.h | 10 + be/src/olap/rowset/beta_rowset.cpp | 34 +- be/src/olap/rowset/beta_rowset_reader.cpp | 3 + be/src/olap/rowset/beta_rowset_writer.cpp | 146 +------ be/src/olap/rowset/beta_rowset_writer.h | 7 - be/src/olap/rowset/beta_rowset_writer_v2.cpp | 4 +- be/src/olap/rowset/rowset_reader_context.h | 2 + be/src/olap/rowset/rowset_writer_context.h | 1 + be/src/olap/rowset/segcompaction.cpp | 7 +- be/src/olap/rowset/segment_creator.cpp | 228 ++++++++-- be/src/olap/rowset/segment_creator.h | 19 +- .../olap/rowset/segment_v2/column_reader.cpp | 87 ++++ be/src/olap/rowset/segment_v2/column_reader.h | 34 ++ .../segment_v2/hierarchical_data_reader.cpp | 232 ++++++++++ .../segment_v2/hierarchical_data_reader.h | 237 ++++++++++ .../rowset/segment_v2/inverted_index_desc.cpp | 30 +- .../rowset/segment_v2/inverted_index_desc.h | 12 +- .../segment_v2/inverted_index_reader.cpp | 16 +- .../segment_v2/inverted_index_writer.cpp | 13 +- be/src/olap/rowset/segment_v2/segment.cpp | 233 ++++++++-- be/src/olap/rowset/segment_v2/segment.h | 61 ++- .../rowset/segment_v2/segment_iterator.cpp | 204 +++++++-- .../olap/rowset/segment_v2/segment_iterator.h | 46 +- .../olap/rowset/segment_v2/segment_writer.cpp | 11 +- .../segment_v2/vertical_segment_writer.cpp | 79 ++-- be/src/olap/rowset_builder.cpp | 1 + be/src/olap/schema.cpp | 20 +- be/src/olap/schema.h | 3 +- be/src/olap/schema_change.cpp | 6 +- be/src/olap/tablet.cpp | 6 +- be/src/olap/tablet_schema.cpp | 159 ++++++- be/src/olap/tablet_schema.h | 29 +- be/src/olap/task/index_builder.cpp | 8 +- be/src/pipeline/exec/scan_operator.cpp | 115 ++++- be/src/pipeline/exec/scan_operator.h | 15 + be/src/service/internal_service.cpp | 14 +- be/src/vec/columns/column_object.cpp | 131 +++--- be/src/vec/columns/column_object.h | 15 +- be/src/vec/columns/subcolumn_tree.h | 3 + be/src/vec/common/schema_util.cpp | 179 ++++++-- be/src/vec/common/schema_util.h | 20 +- be/src/vec/core/block.h | 29 -- be/src/vec/core/columns_with_type_and_name.h | 6 +- be/src/vec/data_types/data_type_decimal.h | 5 +- be/src/vec/data_types/data_type_factory.cpp | 12 +- be/src/vec/data_types/data_type_ipv4.h | 5 + be/src/vec/data_types/data_type_ipv6.h | 3 + be/src/vec/data_types/data_type_number_base.h | 5 + be/src/vec/data_types/data_type_object.cpp | 5 +- .../serde/data_type_jsonb_serde.cpp | 2 +- be/src/vec/exec/scan/new_olap_scan_node.cpp | 50 +++ be/src/vec/exec/scan/new_olap_scan_node.h | 8 + be/src/vec/exec/scan/new_olap_scanner.cpp | 63 ++- be/src/vec/exec/scan/new_olap_scanner.h | 2 + be/src/vec/exec/scan/scanner_scheduler.cpp | 2 +- be/src/vec/exec/scan/vscan_node.cpp | 69 ++- be/src/vec/exec/scan/vscan_node.h | 10 + be/src/vec/exprs/vcast_expr.cpp | 4 + be/src/vec/exprs/vcast_expr.h | 2 + be/src/vec/functions/function_cast.h | 15 +- be/src/vec/json/json_parser.cpp | 2 +- be/src/vec/olap/olap_data_convertor.cpp | 22 +- be/src/vec/olap/olap_data_convertor.h | 2 + docs/en/docs/admin-manual/config/be-config.md | 5 - .../docs/admin-manual/config/be-config.md | 5 - fe/fe-core/src/main/cup/sql_parser.cup | 27 ++ .../org/apache/doris/analysis/Analyzer.java | 57 ++- .../java/org/apache/doris/analysis/Expr.java | 4 + .../org/apache/doris/analysis/IndexDef.java | 3 +- .../apache/doris/analysis/InlineViewRef.java | 13 +- .../doris/analysis/InvertedIndexUtil.java | 2 +- .../org/apache/doris/analysis/QueryStmt.java | 2 + .../apache/doris/analysis/SelectListItem.java | 8 + .../org/apache/doris/analysis/SelectStmt.java | 18 +- .../doris/analysis/SetOperationStmt.java | 6 + .../apache/doris/analysis/SlotDescriptor.java | 39 +- .../org/apache/doris/analysis/SlotRef.java | 34 +- .../doris/analysis/TupleDescriptor.java | 9 + .../org/apache/doris/catalog/OlapTable.java | 9 + .../planner/MaterializedViewSelector.java | 14 +- .../doris/service/FrontendServiceImpl.java | 161 ------- fe/fe-core/src/main/jflex/sql_scanner.flex | 1 + gensrc/proto/internal_service.proto | 1 + gensrc/proto/olap_file.proto | 1 + gensrc/proto/segment_v2.proto | 2 +- gensrc/thrift/FrontendService.thrift | 19 - .../affinityByIssuesAndPRs1.out | 11 + .../affinityByIssuesAndPRs2.out | 6 + .../authorsWithTheMostPushes.out | 53 +++ .../countingStar1.out | 4 + .../countingStar2.out | 4 + .../countingStar3.out | 4 + .../distributionOfRepositoriesByStarCount.out | 5 + .../githubRoulette.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears1.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears2.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears3.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears4.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears5.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears6.out | 53 +++ ...asTheTotalNumberOfStarsChangedOverTime.out | 5 + .../issuesWithTheMostComments1.out | 4 + .../issuesWithTheMostComments2.out | 53 +++ .../issuesWithTheMostComments3.out | 53 +++ .../issuesWithTheMostComments4.out | 53 +++ .../issuesWithTheMostComments5.out | 53 +++ .../issuesWithTheMostComments6.out | 15 + .../issuesWithTheMostComments7.out | 53 +++ .../data/variant_github_events_p0/load.out | 7 + .../mostForkedRepositories.out | 53 +++ .../mostPopularCommentsOnGithub.out | 53 +++ ...organizationsByTheNumberOfRepositories.out | 9 + .../organizationsByTheNumberOfStars.out | 53 +++ .../proportionsBetweenStarsAndForks1.out | 53 +++ .../proportionsBetweenStarsAndForks2.out | 3 + .../proportionsBetweenStarsAndForks3.out | 5 + .../proportionsBetweenStarsAndForks4.out | 4 + .../proportionsBetweenStarsAndForks5.out | 4 + .../repositoriesByAmountOfModifiedCode.out | 53 +++ .../repositoriesByTheNumberOfPushes.out | 53 +++ ...toriesWithClickhouse_related_comments1.out | 15 + ...toriesWithClickhouse_related_comments2.out | 15 + ...epositoriesWithDoris_related_comments1.out | 9 + ...epositoriesWithDoris_related_comments2.out | 9 + .../repositoriesWithTheHighestGrowthYoY.out | 14 + ...ositoriesWithTheMaximumAmountOfIssues1.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues2.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues3.out | 9 + ...ositoriesWithTheMaximumAmountOfIssues4.out | 53 +++ ...iesWithTheMaximumAmountOfPullRequests1.out | 53 +++ ...iesWithTheMaximumAmountOfPullRequests2.out | 53 +++ ...hTheMaximumNumberOfAcceptedInvitations.out | 53 +++ ...iesWithTheMostPeopleWhoHavePushAccess1.out | 53 +++ ...epositoriesWithTheMostStarsOverOneDay1.out | 53 +++ ...itoriesWithTheMostSteadyGrowthOverTime.out | 53 +++ ...positoriesWithTheWorstStagnation_order.out | 9 + .../repositoryAffinityList1.out | 52 +++ .../repositoryAffinityList2.out | 9 + .../starsFromHeavyGithubUsers1.out | 31 ++ .../starsFromHeavyGithubUsers2.out | 10 + .../theLongestRepositoryNames1.out | 53 +++ .../theLongestRepositoryNames2.out | 53 +++ .../theMostToughCodeReviews.out | 53 +++ .../theTotalNumberOfRepositoriesOnGithub.out | 4 + .../theTotalNumberOfUsersOnGithub1.out | 4 + .../theTotalNumberOfUsersOnGithub2.out | 4 + .../theTotalNumberOfUsersOnGithub3.out | 4 + .../theTotalNumberOfUsersOnGithub4.out | 4 + .../topRepositoriesByStars.out | 53 +++ .../whatIsTheBestDayOfTheWeekToCatchAStar.out | 6 + .../whoAreAllThosePeopleGivingStars1.out | 53 +++ .../whoAreAllThosePeopleGivingStars2.out | 4 + .../whoAreAllThosePeopleGivingStars3.out | 41 ++ .../data/variant_p0/column_name.out | 22 + .../data/variant_p0/complexjson.out | 17 + regression-test/data/variant_p0/delete.json | 1 + .../data/variant_p0/delete_update.out | 10 + .../data/variant_p0/insert_into_select.out | 30 ++ regression-test/data/variant_p0/load.out | 261 ++++++++++++ regression-test/data/variant_p0/multi_var.out | 37 ++ .../data/variant_p0/schema_change.out | 49 +++ .../data/variant_p0/sql/gh_data.out | 65 +++ .../data/variant_p0/with_index/load.out | 28 ++ .../data/variant_p0/with_index/var_index.out | 10 + regression-test/data/variant_p2/load.out | 4 + .../sql/authorsWithTheMostPushes.out | 53 +++ .../data/variant_p2/sql/countingStar1.out | 4 + .../data/variant_p2/sql/countingStar2.out | 4 + .../data/variant_p2/sql/countingStar3.out | 4 + .../distributionOfRepositoriesByStarCount.out | 8 + .../data/variant_p2/sql/githubRoulette.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears1.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears2.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears3.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears4.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears5.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears6.out | 53 +++ ...tOfTopRepositoriesChangedOverTheYears7.out | 13 + ...asTheTotalNumberOfStarsChangedOverTime.out | 4 + .../sql/issuesWithTheMostComments1.out | 4 + .../sql/issuesWithTheMostComments2.out | 53 +++ .../sql/issuesWithTheMostComments3.out | 53 +++ .../sql/issuesWithTheMostComments4.out | 53 +++ .../sql/issuesWithTheMostComments5.out | 53 +++ .../sql/issuesWithTheMostComments6.out | 53 +++ .../sql/issuesWithTheMostComments7.out | 53 +++ .../sql/issuesWithTheMostComments8.out | 3 + .../variant_p2/sql/mostForkedRepositories.out | 53 +++ .../sql/mostPopularCommentsOnGithub.out | 4 + ...organizationsByTheNumberOfRepositories.out | 53 +++ .../sql/organizationsByTheNumberOfStars.out | 53 +++ .../sql/organizationsByTheSizeOfCommunity.out | 8 + .../sql/proportionsBetweenStarsAndForks1.out | 53 +++ .../sql/proportionsBetweenStarsAndForks2.out | 53 +++ .../sql/proportionsBetweenStarsAndForks3.out | 53 +++ .../sql/proportionsBetweenStarsAndForks4.out | 4 + .../sql/proportionsBetweenStarsAndForks5.out | 4 + ...toriesWithClickhouse_related_comments1.out | 53 +++ ...toriesWithClickhouse_related_comments2.out | 53 +++ ...epositoriesWithDoris_related_comments1.out | 53 +++ ...epositoriesWithDoris_related_comments2.out | 53 +++ .../repositoriesWithTheHighestGrowthYoY.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues1.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues2.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues3.out | 53 +++ ...ositoriesWithTheMaximumAmountOfIssues4.out | 53 +++ ...iesWithTheMaximumAmountOfPullRequests1.out | 53 +++ ...iesWithTheMaximumAmountOfPullRequests2.out | 53 +++ ...hTheMaximumNumberOfAcceptedInvitations.out | 53 +++ ...iesWithTheMostPeopleWhoHavePushAccess1.out | 53 +++ ...iesWithTheMostPeopleWhoHavePushAccess2.out | 53 +++ ...iesWithTheMostPeopleWhoHavePushAccess3.out | 53 +++ ...epositoriesWithTheMostStarsOverOneDay1.out | 53 +++ ...epositoriesWithTheMostStarsOverOneDay2.out | 53 +++ ...epositoriesWithTheMostStarsOverOneDay3.out | 53 +++ ...itoriesWithTheMostSteadyGrowthOverTime.out | 53 +++ ...positoriesWithTheWorstStagnation_order.out | 53 +++ .../sql/repositoryAffinityList2.out | 53 +++ regression-test/data/variant_p2/sql/sql01.out | 3 + regression-test/data/variant_p2/sql/sql02.out | 4 + regression-test/data/variant_p2/sql/sql03.out | 4 + regression-test/data/variant_p2/sql/sql04.out | 13 + regression-test/data/variant_p2/sql/sql05.out | 4 + regression-test/data/variant_p2/sql/sql06.out | 4 + regression-test/data/variant_p2/sql/sql07.out | 16 + regression-test/data/variant_p2/sql/sql08.out | 13 + .../sql/theLongestRepositoryNames1.out | 53 +++ .../sql/theLongestRepositoryNames2.out | 53 +++ .../sql/theMostToughCodeReviews.out | 53 +++ .../theTotalNumberOfRepositoriesOnGithub.out | 4 + .../sql/theTotalNumberOfUsersOnGithub1.out | 4 + .../sql/theTotalNumberOfUsersOnGithub2.out | 4 + .../sql/theTotalNumberOfUsersOnGithub3.out | 4 + .../sql/theTotalNumberOfUsersOnGithub4.out | 4 + .../variant_p2/sql/topRepositoriesByStars.out | 53 +++ .../whatIsTheBestDayOfTheWeekToCatchAStar.out | 10 + .../sql/whoAreAllThosePeopleGivingStars1.out | 53 +++ .../sql/whoAreAllThosePeopleGivingStars2.out | 4 + .../suites/nereids_syntax_p0/explain.groovy | 2 +- .../affinityByIssuesAndPRs1.sql | 14 + .../affinityByIssuesAndPRs2.sql | 14 + .../authorsWithTheMostPushes.sql | 9 + .../countingStar1.sql | 1 + .../countingStar2.sql | 1 + .../countingStar3.sql | 1 + .../distributionOfRepositoriesByStarCount.sql | 14 + .../githubRoulette.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears1.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears2.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears3.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears4.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears5.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears6.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears7.sql | 30 ++ ...asTheTotalNumberOfStarsChangedOverTime.sql | 2 + .../issuesWithTheMostComments1.sql | 1 + .../issuesWithTheMostComments2.sql | 1 + .../issuesWithTheMostComments3.sql | 17 + .../issuesWithTheMostComments4.sql | 9 + .../issuesWithTheMostComments5.sql | 9 + .../issuesWithTheMostComments6.sql | 11 + .../issuesWithTheMostComments7.sql | 9 + .../issuesWithTheMostComments8.sql | 13 + .../variant_github_events_p0/load.groovy | 73 ++++ .../mostForkedRepositories.sql | 1 + .../mostPopularCommentsOnGithub.sql | 1 + ...organizationsByTheNumberOfRepositories.sql | 14 + .../organizationsByTheNumberOfStars.sql | 8 + .../organizationsByTheSizeOfCommunity.sql | 23 + .../proportionsBetweenStarsAndForks1.sql | 17 + .../proportionsBetweenStarsAndForks2.sql | 18 + .../proportionsBetweenStarsAndForks3.sql | 18 + .../proportionsBetweenStarsAndForks4.sql | 13 + .../proportionsBetweenStarsAndForks5.sql | 21 + .../repositoriesByAmountOfModifiedCode.sql | 12 + .../repositoriesByTheNumberOfPushes.sql | 17 + ...toriesWithClickhouse_related_comments1.sql | 1 + ...toriesWithClickhouse_related_comments2.sql | 17 + ...epositoriesWithDoris_related_comments1.sql | 1 + ...epositoriesWithDoris_related_comments2.sql | 17 + .../repositoriesWithTheHighestGrowthYoY.sql | 20 + ...ositoriesWithTheMaximumAmountOfIssues1.sql | 1 + ...ositoriesWithTheMaximumAmountOfIssues2.sql | 18 + ...ositoriesWithTheMaximumAmountOfIssues3.sql | 19 + ...ositoriesWithTheMaximumAmountOfIssues4.sql | 18 + ...iesWithTheMaximumAmountOfPullRequests1.sql | 1 + ...iesWithTheMaximumAmountOfPullRequests2.sql | 1 + ...hTheMaximumNumberOfAcceptedInvitations.sql | 17 + ...iesWithTheMostPeopleWhoHavePushAccess1.sql | 13 + ...iesWithTheMostPeopleWhoHavePushAccess2.sql | 13 + ...iesWithTheMostPeopleWhoHavePushAccess3.sql | 16 + ...epositoriesWithTheMostStarsOverOneDay1.sql | 25 ++ ...epositoriesWithTheMostStarsOverOneDay2.sql | 25 ++ ...epositoriesWithTheMostStarsOverOneDay3.sql | 1 + ...itoriesWithTheMostSteadyGrowthOverTime.sql | 20 + ...positoriesWithTheWorstStagnation_order.sql | 20 + .../repositoryAffinityList1.sql | 13 + .../repositoryAffinityList2.sql | 23 + .../starsFromHeavyGithubUsers1.sql | 13 + .../starsFromHeavyGithubUsers2.sql | 15 + .../theLongestRepositoryNames1.sql | 1 + .../theLongestRepositoryNames2.sql | 1 + .../theMostToughCodeReviews.sql | 10 + .../theTotalNumberOfRepositoriesOnGithub.sql | 1 + .../theTotalNumberOfUsersOnGithub1.sql | 1 + .../theTotalNumberOfUsersOnGithub2.sql | 1 + .../theTotalNumberOfUsersOnGithub3.sql | 1 + .../theTotalNumberOfUsersOnGithub4.sql | 1 + .../variant_github_events_p0/topLabels1.sql | 9 + .../variant_github_events_p0/topLabels2.sql | 9 + .../variant_github_events_p0/topLabels3.sql | 14 + .../topRepositoriesByStars.sql | 1 + .../whatIsTheBestDayOfTheWeekToCatchAStar.sql | 1 + .../whoAreAllThosePeopleGivingStars1.sql | 1 + .../whoAreAllThosePeopleGivingStars2.sql | 1 + .../whoAreAllThosePeopleGivingStars3.sql | 13 + .../suites/variant_p0/column_name.groovy | 46 ++ .../suites/variant_p0/complexjson.groovy | 158 +++++++ .../suites/variant_p0/delete_update.groovy | 62 +++ .../variant_p0/insert_into_select.groovy | 52 +++ regression-test/suites/variant_p0/load.groovy | 403 ++++++++++++++++++ .../suites/variant_p0/multi_var.groovy | 43 ++ .../suites/variant_p0/schema_change.groovy | 76 ++++ .../suites/variant_p0/sql/gh_data.sql | 13 + .../suites/variant_p0/with_index/load.groovy | 101 +++++ .../variant_p0/with_index/var_index.groovy | 39 ++ .../variant_p2/github_events_advance.groovy | 112 +++++ .../sql/authorsWithTheMostPushes.sql | 9 + .../suites/variant_p2/sql/countingStar1.sql | 1 + .../suites/variant_p2/sql/countingStar2.sql | 1 + .../suites/variant_p2/sql/countingStar3.sql | 1 + .../distributionOfRepositoriesByStarCount.sql | 14 + .../suites/variant_p2/sql/githubRoulette.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears1.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears2.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears3.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears4.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears5.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears6.sql | 1 + ...tOfTopRepositoriesChangedOverTheYears7.sql | 29 ++ ...asTheTotalNumberOfStarsChangedOverTime.sql | 2 + .../sql/issuesWithTheMostComments1.sql | 1 + .../sql/issuesWithTheMostComments2.sql | 1 + .../sql/issuesWithTheMostComments3.sql | 17 + .../sql/issuesWithTheMostComments4.sql | 10 + .../sql/issuesWithTheMostComments5.sql | 9 + .../sql/issuesWithTheMostComments6.sql | 11 + .../sql/issuesWithTheMostComments7.sql | 9 + .../sql/issuesWithTheMostComments8.sql | 13 + .../variant_p2/sql/mostForkedRepositories.sql | 1 + .../sql/mostPopularCommentsOnGithub.sql | 1 + ...organizationsByTheNumberOfRepositories.sql | 14 + .../sql/organizationsByTheNumberOfStars.sql | 12 + .../sql/organizationsByTheSizeOfCommunity.sql | 24 ++ .../sql/proportionsBetweenStarsAndForks1.sql | 17 + .../sql/proportionsBetweenStarsAndForks2.sql | 18 + .../sql/proportionsBetweenStarsAndForks3.sql | 18 + .../sql/proportionsBetweenStarsAndForks4.sql | 13 + .../sql/proportionsBetweenStarsAndForks5.sql | 21 + ...toriesWithClickhouse_related_comments1.sql | 1 + ...toriesWithClickhouse_related_comments2.sql | 17 + ...epositoriesWithDoris_related_comments1.sql | 1 + ...epositoriesWithDoris_related_comments2.sql | 17 + .../repositoriesWithTheHighestGrowthYoY.sql | 19 + ...ositoriesWithTheMaximumAmountOfIssues1.sql | 1 + ...ositoriesWithTheMaximumAmountOfIssues2.sql | 18 + ...ositoriesWithTheMaximumAmountOfIssues3.sql | 19 + ...ositoriesWithTheMaximumAmountOfIssues4.sql | 18 + ...iesWithTheMaximumAmountOfPullRequests1.sql | 1 + ...iesWithTheMaximumAmountOfPullRequests2.sql | 1 + ...hTheMaximumNumberOfAcceptedInvitations.sql | 17 + ...iesWithTheMostPeopleWhoHavePushAccess1.sql | 13 + ...iesWithTheMostPeopleWhoHavePushAccess2.sql | 13 + ...iesWithTheMostPeopleWhoHavePushAccess3.sql | 16 + ...epositoriesWithTheMostStarsOverOneDay1.sql | 22 + ...epositoriesWithTheMostStarsOverOneDay2.sql | 22 + ...epositoriesWithTheMostStarsOverOneDay3.sql | 1 + ...itoriesWithTheMostSteadyGrowthOverTime.sql | 18 + ...positoriesWithTheWorstStagnation_order.sql | 19 + .../sql/repositoryAffinityList2.sql | 23 + .../suites/variant_p2/sql/sql01.sql | 1 + .../suites/variant_p2/sql/sql02.sql | 1 + .../suites/variant_p2/sql/sql03.sql | 1 + .../suites/variant_p2/sql/sql04.sql | 1 + .../suites/variant_p2/sql/sql05.sql | 1 + .../suites/variant_p2/sql/sql06.sql | 1 + .../suites/variant_p2/sql/sql07.sql | 2 + .../suites/variant_p2/sql/sql08.sql | 1 + .../sql/theLongestRepositoryNames1.sql | 1 + .../sql/theLongestRepositoryNames2.sql | 1 + .../sql/theMostToughCodeReviews.sql | 10 + .../theTotalNumberOfRepositoriesOnGithub.sql | 1 + .../sql/theTotalNumberOfUsersOnGithub1.sql | 1 + .../sql/theTotalNumberOfUsersOnGithub2.sql | 1 + .../sql/theTotalNumberOfUsersOnGithub3.sql | 1 + .../sql/theTotalNumberOfUsersOnGithub4.sql | 1 + .../suites/variant_p2/sql/topLabels1.sql | 10 + .../suites/variant_p2/sql/topLabels2.sql | 9 + .../suites/variant_p2/sql/topLabels3.sql | 14 + .../variant_p2/sql/topRepositoriesByStars.sql | 1 + .../whatIsTheBestDayOfTheWeekToCatchAStar.sql | 1 + .../sql/whoAreAllThosePeopleGivingStars1.sql | 1 + .../sql/whoAreAllThosePeopleGivingStars2.sql | 1 + .../affinityByIssuesAndPRs1.sql | 16 + .../affinityByIssuesAndPRs2.sql | 15 + .../repositoriesByAmountOfModifiedCode.sql | 13 + .../repositoriesByTheNumberOfPushes.sql | 18 + .../repositoryAffinityList1.sql | 14 + .../starsFromHeavyGithubUsers1.sql | 14 + .../starsFromHeavyGithubUsers2.sql | 16 + .../whoAreAllThosePeopleGivingStars3.sql | 17 + 433 files changed, 10443 insertions(+), 879 deletions(-) create mode 100644 be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp create mode 100644 be/src/olap/rowset/segment_v2/hierarchical_data_reader.h create mode 100644 regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs1.out create mode 100644 regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs2.out create mode 100644 regression-test/data/variant_github_events_p0/authorsWithTheMostPushes.out create mode 100644 regression-test/data/variant_github_events_p0/countingStar1.out create mode 100644 regression-test/data/variant_github_events_p0/countingStar2.out create mode 100644 regression-test/data/variant_github_events_p0/countingStar3.out create mode 100644 regression-test/data/variant_github_events_p0/distributionOfRepositoriesByStarCount.out create mode 100644 regression-test/data/variant_github_events_p0/githubRoulette.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.out create mode 100644 regression-test/data/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments1.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments2.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments3.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments4.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments5.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments6.out create mode 100644 regression-test/data/variant_github_events_p0/issuesWithTheMostComments7.out create mode 100644 regression-test/data/variant_github_events_p0/load.out create mode 100644 regression-test/data/variant_github_events_p0/mostForkedRepositories.out create mode 100644 regression-test/data/variant_github_events_p0/mostPopularCommentsOnGithub.out create mode 100644 regression-test/data/variant_github_events_p0/organizationsByTheNumberOfRepositories.out create mode 100644 regression-test/data/variant_github_events_p0/organizationsByTheNumberOfStars.out create mode 100644 regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks1.out create mode 100644 regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks2.out create mode 100644 regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks3.out create mode 100644 regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks4.out create mode 100644 regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks5.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesByAmountOfModifiedCode.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesByTheNumberOfPushes.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments2.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.out create mode 100644 regression-test/data/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.out create mode 100644 regression-test/data/variant_github_events_p0/repositoryAffinityList1.out create mode 100644 regression-test/data/variant_github_events_p0/repositoryAffinityList2.out create mode 100644 regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers1.out create mode 100644 regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers2.out create mode 100644 regression-test/data/variant_github_events_p0/theLongestRepositoryNames1.out create mode 100644 regression-test/data/variant_github_events_p0/theLongestRepositoryNames2.out create mode 100644 regression-test/data/variant_github_events_p0/theMostToughCodeReviews.out create mode 100644 regression-test/data/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.out create mode 100644 regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.out create mode 100644 regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.out create mode 100644 regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.out create mode 100644 regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.out create mode 100644 regression-test/data/variant_github_events_p0/topRepositoriesByStars.out create mode 100644 regression-test/data/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.out create mode 100644 regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.out create mode 100644 regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.out create mode 100644 regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.out create mode 100644 regression-test/data/variant_p0/column_name.out create mode 100644 regression-test/data/variant_p0/complexjson.out create mode 100644 regression-test/data/variant_p0/delete.json create mode 100644 regression-test/data/variant_p0/delete_update.out create mode 100644 regression-test/data/variant_p0/insert_into_select.out create mode 100644 regression-test/data/variant_p0/load.out create mode 100644 regression-test/data/variant_p0/multi_var.out create mode 100644 regression-test/data/variant_p0/schema_change.out create mode 100644 regression-test/data/variant_p0/sql/gh_data.out create mode 100644 regression-test/data/variant_p0/with_index/load.out create mode 100644 regression-test/data/variant_p0/with_index/var_index.out create mode 100644 regression-test/data/variant_p2/load.out create mode 100644 regression-test/data/variant_p2/sql/authorsWithTheMostPushes.out create mode 100644 regression-test/data/variant_p2/sql/countingStar1.out create mode 100644 regression-test/data/variant_p2/sql/countingStar2.out create mode 100644 regression-test/data/variant_p2/sql/countingStar3.out create mode 100644 regression-test/data/variant_p2/sql/distributionOfRepositoriesByStarCount.out create mode 100644 regression-test/data/variant_p2/sql/githubRoulette.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.out create mode 100644 regression-test/data/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments1.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments2.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments3.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments4.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments5.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments6.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments7.out create mode 100644 regression-test/data/variant_p2/sql/issuesWithTheMostComments8.out create mode 100644 regression-test/data/variant_p2/sql/mostForkedRepositories.out create mode 100644 regression-test/data/variant_p2/sql/mostPopularCommentsOnGithub.out create mode 100644 regression-test/data/variant_p2/sql/organizationsByTheNumberOfRepositories.out create mode 100644 regression-test/data/variant_p2/sql/organizationsByTheNumberOfStars.out create mode 100644 regression-test/data/variant_p2/sql/organizationsByTheSizeOfCommunity.out create mode 100644 regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks1.out create mode 100644 regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks2.out create mode 100644 regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks3.out create mode 100644 regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks4.out create mode 100644 regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks5.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.out create mode 100644 regression-test/data/variant_p2/sql/repositoriesWithTheWorstStagnation_order.out create mode 100644 regression-test/data/variant_p2/sql/repositoryAffinityList2.out create mode 100644 regression-test/data/variant_p2/sql/sql01.out create mode 100644 regression-test/data/variant_p2/sql/sql02.out create mode 100644 regression-test/data/variant_p2/sql/sql03.out create mode 100644 regression-test/data/variant_p2/sql/sql04.out create mode 100644 regression-test/data/variant_p2/sql/sql05.out create mode 100644 regression-test/data/variant_p2/sql/sql06.out create mode 100644 regression-test/data/variant_p2/sql/sql07.out create mode 100644 regression-test/data/variant_p2/sql/sql08.out create mode 100644 regression-test/data/variant_p2/sql/theLongestRepositoryNames1.out create mode 100644 regression-test/data/variant_p2/sql/theLongestRepositoryNames2.out create mode 100644 regression-test/data/variant_p2/sql/theMostToughCodeReviews.out create mode 100644 regression-test/data/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.out create mode 100644 regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub1.out create mode 100644 regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub2.out create mode 100644 regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub3.out create mode 100644 regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub4.out create mode 100644 regression-test/data/variant_p2/sql/topRepositoriesByStars.out create mode 100644 regression-test/data/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.out create mode 100644 regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars1.out create mode 100644 regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars2.out create mode 100644 regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs1.sql create mode 100644 regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs2.sql create mode 100644 regression-test/suites/variant_github_events_p0/authorsWithTheMostPushes.sql create mode 100644 regression-test/suites/variant_github_events_p0/countingStar1.sql create mode 100644 regression-test/suites/variant_github_events_p0/countingStar2.sql create mode 100644 regression-test/suites/variant_github_events_p0/countingStar3.sql create mode 100644 regression-test/suites/variant_github_events_p0/distributionOfRepositoriesByStarCount.sql create mode 100644 regression-test/suites/variant_github_events_p0/githubRoulette.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql create mode 100644 regression-test/suites/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments1.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments2.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments3.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments4.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments5.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments6.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments7.sql create mode 100644 regression-test/suites/variant_github_events_p0/issuesWithTheMostComments8.sql create mode 100644 regression-test/suites/variant_github_events_p0/load.groovy create mode 100644 regression-test/suites/variant_github_events_p0/mostForkedRepositories.sql create mode 100644 regression-test/suites/variant_github_events_p0/mostPopularCommentsOnGithub.sql create mode 100644 regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfRepositories.sql create mode 100644 regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfStars.sql create mode 100644 regression-test/suites/variant_github_events_p0/organizationsByTheSizeOfCommunity.sql create mode 100644 regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks1.sql create mode 100644 regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks2.sql create mode 100644 regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks3.sql create mode 100644 regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks4.sql create mode 100644 regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks5.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesByAmountOfModifiedCode.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesByTheNumberOfPushes.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoryAffinityList1.sql create mode 100644 regression-test/suites/variant_github_events_p0/repositoryAffinityList2.sql create mode 100644 regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers1.sql create mode 100644 regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers2.sql create mode 100644 regression-test/suites/variant_github_events_p0/theLongestRepositoryNames1.sql create mode 100644 regression-test/suites/variant_github_events_p0/theLongestRepositoryNames2.sql create mode 100644 regression-test/suites/variant_github_events_p0/theMostToughCodeReviews.sql create mode 100644 regression-test/suites/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql create mode 100644 regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.sql create mode 100644 regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.sql create mode 100644 regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.sql create mode 100644 regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.sql create mode 100644 regression-test/suites/variant_github_events_p0/topLabels1.sql create mode 100644 regression-test/suites/variant_github_events_p0/topLabels2.sql create mode 100644 regression-test/suites/variant_github_events_p0/topLabels3.sql create mode 100644 regression-test/suites/variant_github_events_p0/topRepositoriesByStars.sql create mode 100644 regression-test/suites/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql create mode 100644 regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.sql create mode 100644 regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.sql create mode 100644 regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.sql create mode 100644 regression-test/suites/variant_p0/column_name.groovy create mode 100644 regression-test/suites/variant_p0/complexjson.groovy create mode 100644 regression-test/suites/variant_p0/delete_update.groovy create mode 100644 regression-test/suites/variant_p0/insert_into_select.groovy create mode 100644 regression-test/suites/variant_p0/load.groovy create mode 100644 regression-test/suites/variant_p0/multi_var.groovy create mode 100644 regression-test/suites/variant_p0/schema_change.groovy create mode 100644 regression-test/suites/variant_p0/sql/gh_data.sql create mode 100644 regression-test/suites/variant_p0/with_index/load.groovy create mode 100644 regression-test/suites/variant_p0/with_index/var_index.groovy create mode 100644 regression-test/suites/variant_p2/github_events_advance.groovy create mode 100644 regression-test/suites/variant_p2/sql/authorsWithTheMostPushes.sql create mode 100644 regression-test/suites/variant_p2/sql/countingStar1.sql create mode 100644 regression-test/suites/variant_p2/sql/countingStar2.sql create mode 100644 regression-test/suites/variant_p2/sql/countingStar3.sql create mode 100644 regression-test/suites/variant_p2/sql/distributionOfRepositoriesByStarCount.sql create mode 100644 regression-test/suites/variant_p2/sql/githubRoulette.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql create mode 100644 regression-test/suites/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments1.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments2.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments3.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments4.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments5.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments6.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments7.sql create mode 100644 regression-test/suites/variant_p2/sql/issuesWithTheMostComments8.sql create mode 100644 regression-test/suites/variant_p2/sql/mostForkedRepositories.sql create mode 100644 regression-test/suites/variant_p2/sql/mostPopularCommentsOnGithub.sql create mode 100644 regression-test/suites/variant_p2/sql/organizationsByTheNumberOfRepositories.sql create mode 100644 regression-test/suites/variant_p2/sql/organizationsByTheNumberOfStars.sql create mode 100644 regression-test/suites/variant_p2/sql/organizationsByTheSizeOfCommunity.sql create mode 100644 regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks1.sql create mode 100644 regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks2.sql create mode 100644 regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks3.sql create mode 100644 regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks4.sql create mode 100644 regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks5.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoriesWithTheWorstStagnation_order.sql create mode 100644 regression-test/suites/variant_p2/sql/repositoryAffinityList2.sql create mode 100644 regression-test/suites/variant_p2/sql/sql01.sql create mode 100644 regression-test/suites/variant_p2/sql/sql02.sql create mode 100644 regression-test/suites/variant_p2/sql/sql03.sql create mode 100644 regression-test/suites/variant_p2/sql/sql04.sql create mode 100644 regression-test/suites/variant_p2/sql/sql05.sql create mode 100644 regression-test/suites/variant_p2/sql/sql06.sql create mode 100644 regression-test/suites/variant_p2/sql/sql07.sql create mode 100644 regression-test/suites/variant_p2/sql/sql08.sql create mode 100644 regression-test/suites/variant_p2/sql/theLongestRepositoryNames1.sql create mode 100644 regression-test/suites/variant_p2/sql/theLongestRepositoryNames2.sql create mode 100644 regression-test/suites/variant_p2/sql/theMostToughCodeReviews.sql create mode 100644 regression-test/suites/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.sql create mode 100644 regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub1.sql create mode 100644 regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub2.sql create mode 100644 regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub3.sql create mode 100644 regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub4.sql create mode 100644 regression-test/suites/variant_p2/sql/topLabels1.sql create mode 100644 regression-test/suites/variant_p2/sql/topLabels2.sql create mode 100644 regression-test/suites/variant_p2/sql/topLabels3.sql create mode 100644 regression-test/suites/variant_p2/sql/topRepositoriesByStars.sql create mode 100644 regression-test/suites/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.sql create mode 100644 regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars1.sql create mode 100644 regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars2.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs1.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs2.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/repositoriesByAmountOfModifiedCode.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/repositoriesByTheNumberOfPushes.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/repositoryAffinityList1.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers1.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers2.sql create mode 100644 regression-test/suites/variant_p2/unresovled_sql/whoAreAllThosePeopleGivingStars3.sql diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 194d64aee9..3418a49d45 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -955,8 +955,9 @@ DEFINE_Bool(enable_workload_group_for_scan, "false"); // Will remove after fully test. DEFINE_Bool(enable_index_apply_preds_except_leafnode_of_andnode, "true"); -DEFINE_mBool(enable_flatten_nested_for_variant, "false"); -DEFINE_mDouble(ratio_of_defaults_as_sparse_column, "0.95"); +DEFINE_mBool(variant_enable_flatten_nested, "false"); +DEFINE_mDouble(variant_ratio_of_defaults_as_sparse_column, "0.95"); +DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "1000"); // block file cache DEFINE_Bool(enable_file_cache, "false"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 1f6634d8da..0826db45e2 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1124,10 +1124,13 @@ DECLARE_mInt64(lookup_connection_cache_bytes_limit); DECLARE_mInt64(LZ4_HC_compression_level); // Whether flatten nested arrays in variant column // Notice: TEST ONLY -DECLARE_mBool(enable_flatten_nested_for_variant); +DECLARE_mBool(variant_enable_flatten_nested); // Threshold of a column as sparse column // Notice: TEST ONLY -DECLARE_mDouble(ratio_of_defaults_as_sparse_column); +DECLARE_mDouble(variant_ratio_of_defaults_as_sparse_column); +// Threshold to estimate a column is sparsed +// Notice: TEST ONLY +DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column); DECLARE_mBool(enable_merge_on_write_correctness_check); // rowid conversion correctness check when compaction for mow table diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h index 1a5f586ed5..90cff5cc70 100644 --- a/be/src/olap/accept_null_predicate.h +++ b/be/src/olap/accept_null_predicate.h @@ -49,9 +49,14 @@ public: return _nested->evaluate(iterator, num_rows, roaring); } - Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override { - return _nested->evaluate(schema, iterator, num_rows, bitmap); + return _nested->evaluate(name_with_type, iterator, num_rows, bitmap); + } + + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return _nested->can_do_apply_safely(input_type, is_null); } uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index d9d4a4c537..db17e68706 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -68,10 +68,9 @@ void BaseTablet::update_max_version_schema(const TabletSchemaSPtr& tablet_schema void BaseTablet::update_by_least_common_schema(const TabletSchemaSPtr& update_schema) { std::lock_guard wrlock(_meta_lock); - auto final_schema = std::make_shared(); CHECK(_max_version_schema->schema_version() >= update_schema->schema_version()); - vectorized::schema_util::get_least_common_schema({_max_version_schema, update_schema}, - final_schema); + auto final_schema = vectorized::schema_util::get_least_common_schema( + {_max_version_schema, update_schema}, _max_version_schema); _max_version_schema = final_schema; VLOG_DEBUG << "dump updated tablet schema: " << final_schema->dump_structure(); } diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index de03a1bc61..7420356a33 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -48,6 +48,10 @@ public: PredicateType type() const override { return PredicateType::BITMAP_FILTER; } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return input_type == T || (is_string_type(input_type) && is_string_type(T)); + } + bool evaluate_and(const std::pair& statistic) const override { if (_specific_filter->is_not_in()) { return true; diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index f70da43f56..cde2a5680a 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -72,6 +72,11 @@ public: virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const { } + virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const { + LOG(FATAL) << "should not reach here"; + return true; + } + virtual bool evaluate_and(const std::pair& statistic) const { LOG(FATAL) << "should not reach here"; return true; @@ -125,6 +130,10 @@ public: return _predicate->can_do_bloom_filter(ngram); } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return _predicate->can_do_apply_safely(input_type, is_null); + } + private: const ColumnPredicate* _predicate = nullptr; }; @@ -199,6 +208,15 @@ public: return true; } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + for (auto& pred : _block_column_predicate_vec) { + if (!pred->can_do_apply_safely(input_type, is_null)) { + return false; + } + } + return true; + } + Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override; }; diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 156f054a3f..51abd68a4b 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -55,6 +55,10 @@ public: uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override; + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return input_type == T || (is_string_type(input_type) && is_string_type(T)); + } + private: template uint16_t evaluate(const vectorized::IColumn& column, const uint8_t* null_map, uint16_t* sel, diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 15b36f672e..42d9a3d58f 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -24,6 +24,7 @@ #include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/schema.h" #include "olap/selection_vector.h" +#include "runtime/define_primitive_type.h" #include "vec/columns/column.h" using namespace doris::segment_v2; @@ -173,8 +174,9 @@ public: roaring::Roaring* roaring) const = 0; //evaluate predicate on inverted - virtual Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, - uint32_t num_rows, roaring::Roaring* bitmap) const { + virtual Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* bitmap) const { return Status::NotSupported( "Not Implemented evaluate with inverted index, please check the predicate"); } @@ -210,6 +212,11 @@ public: virtual bool can_do_bloom_filter(bool ngram) const { return false; } + // Check input type could apply safely. + // Note: Currenly ColumnPredicate is not include complex type, so use PrimitiveType + // is simple and intuitive + virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const = 0; + // used to evaluate pre read column in lazy materialization // now only support integer/float // a vectorized eval way diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 47d607518e..b889a0fe70 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -455,7 +455,8 @@ Status Compaction::do_compaction_impl(int64_t permits) { &dest_index_files, &fs, &tablet_path, &trans_vec, &dest_segment_num_rows, this](int32_t column_uniq_id) { auto st = compact_column( - _cur_tablet_schema->get_inverted_index(column_uniq_id)->index_id(), + _cur_tablet_schema->get_inverted_index(column_uniq_id, "") + ->index_id(), src_segment_num, dest_segment_num, src_index_files, dest_index_files, fs, index_writer_path, tablet_path, trans_vec, dest_segment_num_rows); @@ -463,7 +464,7 @@ Status Compaction::do_compaction_impl(int64_t permits) { LOG(ERROR) << "failed to do index compaction" << ". tablet=" << _tablet->tablet_id() << ". column uniq id=" << column_uniq_id << ". index_id= " - << _cur_tablet_schema->get_inverted_index(column_uniq_id) + << _cur_tablet_schema->get_inverted_index(column_uniq_id, "") ->index_id(); } }); @@ -554,7 +555,7 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool auto fs = rowset->rowset_meta()->fs(); auto index_meta = - rowset->tablet_schema()->get_inverted_index(unique_id); + rowset->tablet_schema()->get_inverted_index(unique_id, ""); if (index_meta == nullptr) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" << unique_id @@ -565,7 +566,8 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool auto segment_file = rowset->segment_file_path(i); std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_name( - segment_file, index_meta->index_id()); + segment_file, index_meta->index_id(), + index_meta->get_index_suffix()); bool exists = false; if (!fs->exists(inverted_index_src_file_path, &exists).ok()) { LOG(ERROR) diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index aed5787721..3505a94e27 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -47,6 +47,10 @@ public: *to = cloned; } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return input_type == Type || (is_string_type(input_type) && is_string_type(Type)); + } + bool need_to_clone() const override { return true; } PredicateType type() const override { return PT; } @@ -76,13 +80,13 @@ public: bitmap); } - Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override { if (iterator == nullptr) { return Status::OK(); } - auto column_desc = schema.column(_column_id); - std::string column_name = column_desc->name(); + std::string column_name = name_with_type.first; InvertedIndexQueryType query_type = InvertedIndexQueryType::UNKNOWN_QUERY; switch (PT) { @@ -298,10 +302,12 @@ public: LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { - auto* data_array = reinterpret_cast>&>(nested_column) - .get_data() - .data(); + auto* data_array = + vectorized::check_and_get_column< + const vectorized::PredicateColumnType>>( + nested_column) + ->get_data() + .data(); _base_loop_vec(size, flags, null_map.data(), data_array, _value); } diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 68b97e49cb..9b80e4ec3e 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -51,6 +51,7 @@ #include "util/mem_info.h" #include "util/ref_count_closure.h" #include "util/stopwatch.hpp" +#include "util/time.h" #include "vec/core/block.h" namespace doris { @@ -230,14 +231,14 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(PNodeInfo node_info) { _unfinished_slave_node.insert(node_info.id()); } - std::vector indices_ids; + std::vector> indices_ids; auto cur_rowset = _rowset_builder.rowset(); auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema(); if (!tablet_schema->skip_write_index_on_load()) { for (auto& column : tablet_schema->columns()) { - const TabletIndex* index_meta = tablet_schema->get_inverted_index(column.unique_id()); + const TabletIndex* index_meta = tablet_schema->get_inverted_index(column); if (index_meta) { - indices_ids.emplace_back(index_meta->index_id()); + indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix()); } } } @@ -260,11 +261,12 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(PNodeInfo node_info) { if (!indices_ids.empty()) { for (auto index_id : indices_ids) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_name( - tablet_path + "/" + segment_name.str(), index_id); + tablet_path + "/" + segment_name.str(), index_id.first, index_id.second); int64_t size = std::filesystem::file_size(inverted_index_file); PTabletWriteSlaveRequest::IndexSize index_size; - index_size.set_indexid(index_id); + index_size.set_indexid(index_id.first); index_size.set_size(size); + index_size.set_suffix_path(index_id.second); // Fetch the map value for the current segment_id. // If it doesn't exist, this will insert a new default-constructed IndexSizeMapValue auto& index_size_map_value = diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp index 0a4108970a..6f6dd939a4 100644 --- a/be/src/olap/delta_writer_v2.cpp +++ b/be/src/olap/delta_writer_v2.cpp @@ -113,6 +113,7 @@ Status DeltaWriterV2::init() { context.rowset_state = PREPARED; context.segments_overlap = OVERLAPPING; context.tablet_schema = _tablet_schema; + context.original_tablet_schema = _tablet_schema; context.newest_write_timestamp = UnixSeconds(); context.tablet = nullptr; context.write_type = DataWriteType::TYPE_DIRECT; diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 412ee2a76f..be95f1a0e3 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -47,7 +47,8 @@ public: _name(column.name()), _index_size(column.index_length()), _is_nullable(column.is_nullable()), - _unique_id(column.unique_id()) {} + _unique_id(column.unique_id()), + _path(column.path_info()) {} virtual ~Field() = default; @@ -57,6 +58,7 @@ public: size_t index_size() const { return _index_size; } int32_t unique_id() const { return _unique_id; } const std::string& name() const { return _name; } + const vectorized::PathInData& path() const { return _path; } virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } virtual void set_to_zone_map_max(char* buf) const { set_to_max(buf); } @@ -255,6 +257,7 @@ private: int32_t _precision; int32_t _scale; int32_t _unique_id; + vectorized::PathInData _path; }; class MapField : public Field { diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 734c7d75bd..ce74fb1bd3 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -142,6 +142,10 @@ public: PredicateType type() const override { return PT; } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return input_type == Type || (is_string_type(input_type) && is_string_type(Type)); + } + Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* result) const override { if (iterator == nullptr) { @@ -183,13 +187,13 @@ public: return Status::OK(); } - Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* result) const override { if (iterator == nullptr) { return Status::OK(); } - auto column_desc = schema.column(_column_id); - std::string column_name = column_desc->name(); + std::string column_name = name_with_type.first; roaring::Roaring indices; HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index e2d1e67957..1f381ac4d7 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -39,6 +39,10 @@ namespace vectorized { struct IteratorRowRef; }; +namespace segment_v2 { +struct StreamReader; +} + class StorageReadOptions { public: struct KeyRange { @@ -109,6 +113,8 @@ public: RowsetId rowset_id; Version version; int32_t tablet_id = 0; + // slots that cast may be eliminated in storage layer + std::map target_cast_type_for_variants; }; class RowwiseIterator; diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index 7034efddd9..3a918d7605 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -62,6 +62,10 @@ public: return Status::OK(); } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return input_type == T || (is_string_type(input_type) && is_string_type(T)); + } + uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override; diff --git a/be/src/olap/match_predicate.cpp b/be/src/olap/match_predicate.cpp index 561bc13551..c0ed7bc008 100644 --- a/be/src/olap/match_predicate.cpp +++ b/be/src/olap/match_predicate.cpp @@ -26,9 +26,15 @@ #include "olap/rowset/segment_v2/inverted_index_cache.h" #include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/schema.h" +#include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/define_primitive_type.h" +#include "runtime/types.h" +#include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" namespace doris { @@ -39,8 +45,9 @@ PredicateType MatchPredicate::type() const { return PredicateType::MATCH; } -Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* iterator, - uint32_t num_rows, roaring::Roaring* bitmap) const { +Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* bitmap) const { if (iterator == nullptr) { return Status::OK(); } @@ -48,25 +55,28 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite return Status::Error( "match predicate evaluate skipped."); } - auto column_desc = schema.column(_column_id); + auto type = name_with_type.second; + const std::string& name = name_with_type.first; roaring::Roaring roaring; auto inverted_index_query_type = _to_inverted_index_query_type(_match_type); - - if (is_string_type(column_desc->type()) || - (column_desc->type() == FieldType::OLAP_FIELD_TYPE_ARRAY && - is_string_type(column_desc->get_sub_field(0)->type_info()->type()))) { + TypeDescriptor column_desc = type->get_type_as_type_descriptor(); + if (is_string_type(column_desc.type) || + (column_desc.type == TYPE_ARRAY && is_string_type(column_desc.children[0].type))) { StringRef match_value; int32_t length = _value.length(); char* buffer = const_cast(_value.c_str()); match_value.replace(buffer, length); //is it safe? RETURN_IF_ERROR(iterator->read_from_inverted_index( - column_desc->name(), &match_value, inverted_index_query_type, num_rows, &roaring)); - } else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_ARRAY && - is_numeric_type(column_desc->get_sub_field(0)->type_info()->type())) { - char buf[column_desc->get_sub_field(0)->type_info()->size()]; - RETURN_IF_ERROR(column_desc->get_sub_field(0)->from_string(buf, _value)); - RETURN_IF_ERROR(iterator->read_from_inverted_index( - column_desc->name(), buf, inverted_index_query_type, num_rows, &roaring, true)); + name, &match_value, inverted_index_query_type, num_rows, &roaring)); + } else if (column_desc.type == TYPE_ARRAY && + is_numeric_type( + TabletColumn::get_field_type_by_type(column_desc.children[0].type))) { + char buf[column_desc.children[0].len]; + const TypeInfo* type_info = get_scalar_type_info( + TabletColumn::get_field_type_by_type(column_desc.children[0].type)); + RETURN_IF_ERROR(type_info->from_string(buf, _value)); + RETURN_IF_ERROR(iterator->read_from_inverted_index(name, buf, inverted_index_query_type, + num_rows, &roaring, true)); } // mask out null_bitmap, since NULL cmp VALUE will produce NULL diff --git a/be/src/olap/match_predicate.h b/be/src/olap/match_predicate.h index 4232469f4a..915bfd445e 100644 --- a/be/src/olap/match_predicate.h +++ b/be/src/olap/match_predicate.h @@ -57,9 +57,14 @@ public: } //evaluate predicate on inverted - Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override; + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + return is_string_type(input_type); + } + private: InvertedIndexQueryType _to_inverted_index_query_type(MatchType match_type) const; std::string _debug_string() const override { diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index ab1cf5e4f4..b2e767779b 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -53,8 +53,9 @@ Status NullPredicate::evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, return Status::OK(); } -Status NullPredicate::evaluate(const Schema& schema, InvertedIndexIterator* iterator, - uint32_t num_rows, roaring::Roaring* bitmap) const { +Status NullPredicate::evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* bitmap) const { // mask out null_bitmap, since NULL cmp VALUE will produce NULL // and be treated as false in WHERE InvertedIndexQueryCacheHandle null_bitmap_cache_handle; diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index 4313adea11..388d080317 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -52,7 +52,8 @@ public: Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* roaring) const override; - Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + Status evaluate(const vectorized::NameAndTypePair& name_with_type, + InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override; uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, @@ -96,6 +97,11 @@ public: bool can_do_bloom_filter(bool ngram) const override { return _is_null && !ngram; } + bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override { + // Always safe to apply is null predicate + return true; + } + void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override; private: diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index de669de603..fb459163f6 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -167,6 +167,7 @@ Status PushHandler::_do_streaming_ingestion(TabletSharedPtr tablet, const TPushR tablet_schema->copy_from(*tablet->tablet_schema()); if (!request.columns_desc.empty() && request.columns_desc[0].col_unique_id >= 0) { tablet_schema->clear_columns(); + // TODO(lhy) handle variant for (const auto& column_desc : request.columns_desc) { tablet_schema->append_column(TabletColumn(column_desc)); } @@ -228,6 +229,7 @@ Status PushHandler::_convert_v2(TabletSharedPtr cur_tablet, RowsetSharedPtr* cur context.rowset_state = PREPARED; context.segments_overlap = OVERLAP_UNKNOWN; context.tablet_schema = tablet_schema; + context.original_tablet_schema = tablet_schema; context.newest_write_timestamp = UnixSeconds(); auto rowset_writer = DORIS_TRY(cur_tablet->create_rowset_writer(context, false)); _pending_rs_guard = diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 75edcb0b97..68365b66fc 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -47,6 +47,7 @@ #include "olap/schema.h" #include "olap/tablet.h" #include "olap/tablet_meta.h" +#include "olap/tablet_schema.h" #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" @@ -260,6 +261,16 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { return Status::OK(); } +TabletColumn TabletReader::materialize_column(const TabletColumn& orig) { + if (!orig.is_variant_type()) { + return orig; + } + TabletColumn column_with_cast_type = orig; + auto cast_type = _reader_context.target_cast_type_for_variants.at(orig.name()); + column_with_cast_type.set_type(TabletColumn::get_field_type_by_type(cast_type)); + return column_with_cast_type; +} + Status TabletReader::_init_params(const ReaderParams& read_params) { read_params.check_validation(); @@ -269,6 +280,7 @@ Status TabletReader::_init_params(const ReaderParams& read_params) { _tablet = read_params.tablet; _tablet_schema = read_params.tablet_schema; _reader_context.runtime_state = read_params.runtime_state; + _reader_context.target_cast_type_for_variants = read_params.target_cast_type_for_variants; RETURN_IF_ERROR(_init_conditions_param(read_params)); _init_conditions_param_except_leafnode_of_andnode(read_params); @@ -472,7 +484,7 @@ Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { RETURN_IF_ERROR(_tablet_schema->have_column(tmp_cond.column_name)); // The "column" parameter might represent a column resulting from the decomposition of a variant column. // Instead of using a "unique_id" for identification, we are utilizing a "path" to denote this column. - const auto& column = _tablet_schema->column(tmp_cond.column_name); + const auto& column = materialize_column(_tablet_schema->column(tmp_cond.column_name)); uint32_t index = _tablet_schema->field_index(tmp_cond.column_name); ColumnPredicate* predicate = parse_to_predicate(column, index, tmp_cond, _predicate_arena.get()); @@ -518,9 +530,9 @@ Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { for (const auto& filter : read_params.function_filters) { _col_predicates.emplace_back(_parse_to_predicate(filter)); auto* pred = _col_predicates.back(); - const auto& col = _tablet->tablet_schema()->column(pred->column_id()); + const auto& col = _tablet_schema->column(pred->column_id()); auto is_like = is_like_predicate(pred); - auto* tablet_index = _tablet->tablet_schema()->get_ngram_bf_index(col.unique_id()); + auto* tablet_index = _tablet_schema->get_ngram_bf_index(col.unique_id()); if (is_like && tablet_index && config::enable_query_like_bloom_filter) { std::unique_ptr ng_bf; @@ -545,7 +557,7 @@ void TabletReader::_init_conditions_param_except_leafnode_of_andnode( const ReaderParams& read_params) { for (const auto& condition : read_params.conditions_except_leafnode_of_andnode) { TCondition tmp_cond = condition; - const auto& column = _tablet_schema->column(tmp_cond.column_name); + const auto& column = materialize_column(_tablet_schema->column(tmp_cond.column_name)); uint32_t index = _tablet_schema->field_index(tmp_cond.column_name); ColumnPredicate* predicate = parse_to_predicate(column, index, tmp_cond, _predicate_arena.get()); @@ -570,7 +582,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( if (index < 0) { return nullptr; } - const TabletColumn& column = _tablet_schema->column(index); + const TabletColumn& column = materialize_column(_tablet_schema->column(index)); return create_column_predicate(index, bloom_filter.second, column.type(), _reader_context.runtime_state->be_exec_version(), &column); } @@ -581,7 +593,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( if (index < 0) { return nullptr; } - const TabletColumn& column = _tablet_schema->column(index); + const TabletColumn& column = materialize_column(_tablet_schema->column(index)); return create_column_predicate(index, in_filter.second, column.type(), _reader_context.runtime_state->be_exec_version(), &column); } @@ -592,7 +604,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( if (index < 0) { return nullptr; } - const TabletColumn& column = _tablet_schema->column(index); + const TabletColumn& column = materialize_column(_tablet_schema->column(index)); return create_column_predicate(index, bitmap_filter.second, column.type(), _reader_context.runtime_state->be_exec_version(), &column); } @@ -602,7 +614,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const FunctionFilter& functio if (index < 0) { return nullptr; } - const TabletColumn& column = _tablet_schema->column(index); + const TabletColumn& column = materialize_column(_tablet_schema->column(index)); return create_column_predicate(index, std::make_shared(function_filter), column.type(), _reader_context.runtime_state->be_exec_version(), &column); diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index a6cbd58460..ed57b9e3d1 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -135,6 +135,8 @@ public: std::vector conditions_except_leafnode_of_andnode; std::vector function_filters; std::vector delete_predicates; + // slots that cast may be eliminated in storage layer + std::map target_cast_type_for_variants; std::vector rs_splits; // For unique key table with merge-on-write @@ -257,6 +259,14 @@ protected: Status _init_return_columns(const ReaderParams& read_params); const BaseTabletSPtr& tablet() { return _tablet; } + // If original column is a variant type column, and it's predicate is normalized + // so in order to get the real type of column predicate, we need to reset type + // according to the related type in `target_cast_type_for_variants`.Since variant is not + // an predicate applicable type.Otherwise return the original tablet column. + // Eg. `where cast(v:a as bigint) > 1` will elimate cast, and materialize this variant column + // to type bigint + TabletColumn materialize_column(const TabletColumn& orig); + const TabletSchema& tablet_schema() { return *_tablet_schema; } std::unique_ptr _predicate_arena; diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 67fbb6c020..b65eac88d3 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -172,10 +172,10 @@ Status BetaRowset::remove() { success = false; } for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column.unique_id()); + const TabletIndex* index_meta = _schema->get_inverted_index(column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_name( - seg_path, index_meta->index_id()); + seg_path, index_meta->index_id(), index_meta->get_index_suffix()); st = fs->delete_file(inverted_index_file); if (!st.ok()) { LOG(WARNING) << st.to_string(); @@ -252,10 +252,10 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, if (without_index_uids != nullptr && without_index_uids->count(index_id)) { continue; } - std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name(src_path, index_id); - std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(dst_path, index_id); + std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_name( + src_path, index_id, index.get_index_suffix()); + std::string inverted_index_dst_file_path = InvertedIndexDescriptor::get_index_file_name( + dst_path, index_id, index.get_index_suffix()); bool index_file_exists = true; RETURN_IF_ERROR(local_fs->exists(inverted_index_src_file_path, &index_file_exists)); if (index_file_exists) { @@ -300,14 +300,14 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row RETURN_IF_ERROR(io::global_local_filesystem()->copy_dirs(src_path, dst_path)); for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column.unique_id()); + const TabletIndex* index_meta = _schema->get_inverted_index(column); if (index_meta) { std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name(src_path, - index_meta->index_id()); + InvertedIndexDescriptor::get_index_file_name( + src_path, index_meta->index_id(), index_meta->get_index_suffix()); std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(dst_path, - index_meta->index_id()); + InvertedIndexDescriptor::get_index_file_name( + dst_path, index_meta->index_id(), index_meta->get_index_suffix()); RETURN_IF_ERROR(io::global_local_filesystem()->copy_dirs( inverted_index_src_file_path, inverted_index_dst_file_path)); LOG(INFO) << "success to copy file. from=" << inverted_index_src_file_path << ", " @@ -335,14 +335,16 @@ Status BetaRowset::upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_ local_paths.push_back(local_seg_path); for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column.unique_id()); + const TabletIndex* index_meta = _schema->get_inverted_index(column); if (index_meta) { std::string remote_inverted_index_file = - InvertedIndexDescriptor::get_index_file_name(remote_seg_path, - index_meta->index_id()); + InvertedIndexDescriptor::get_index_file_name( + remote_seg_path, index_meta->index_id(), + index_meta->get_index_suffix()); std::string local_inverted_index_file = - InvertedIndexDescriptor::get_index_file_name(local_seg_path, - index_meta->index_id()); + InvertedIndexDescriptor::get_index_file_name( + local_seg_path, index_meta->index_id(), + index_meta->get_index_suffix()); dest_paths.push_back(remote_inverted_index_file); local_paths.push_back(local_inverted_index_file); } diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 8877eed2b8..18725b116d 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -138,7 +138,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context // It is necessary to ensure that there is a schema version when using a cache // because the absence of a schema version can result in reading a stale version // of the schema after a schema change. + // For table contains variants, it's schema is unstable and variable so we could not use schema cache here if (_read_context->tablet_schema->schema_version() < 0 || + _read_context->tablet_schema->num_variant_columns() > 0 || (_input_schema = SchemaCache::instance()->get_schema(schema_key)) == nullptr) { _input_schema = std::make_shared(_read_context->tablet_schema->columns(), read_columns); @@ -212,6 +214,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.io_ctx.reader_type = _read_context->reader_type; _read_options.io_ctx.file_cache_stats = &_stats->file_cache_stats; _read_options.io_ctx.is_disposable = _read_context->reader_type != ReaderType::READER_QUERY; + _read_options.target_cast_type_for_variants = _read_context->target_cast_type_for_variants; if (_read_context->runtime_state != nullptr) { _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id(); _read_options.io_ctx.read_file_cache = diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 62c32c9a57..bcac89ad9a 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -52,8 +52,7 @@ #include "util/slice.h" #include "util/time.h" #include "vec/columns/column.h" -#include "vec/columns/column_object.h" -#include "vec/common/schema_util.h" // variant column +#include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/data_types/data_type_factory.hpp" @@ -321,17 +320,19 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u int ret; // rename remaining inverted index files for (auto column : _context.tablet_schema->columns()) { - if (_context.tablet_schema->has_inverted_index(column.unique_id())) { - auto index_id = - _context.tablet_schema->get_inverted_index(column.unique_id())->index_id(); + if (_context.tablet_schema->has_inverted_index(column)) { + auto index_info = _context.tablet_schema->get_inverted_index(column); + auto index_id = index_info->index_id(); auto src_idx_path = begin < 0 ? InvertedIndexDescriptor::inverted_index_file_path( - _context.rowset_dir, _context.rowset_id, seg_id, index_id) + _context.rowset_dir, _context.rowset_id, seg_id, index_id, + index_info->get_index_suffix()) : InvertedIndexDescriptor::local_inverted_index_path_segcompacted( _context.rowset_dir, _context.rowset_id, begin, end, - index_id); + index_id, index_info->get_index_suffix()); auto dst_idx_path = InvertedIndexDescriptor::inverted_index_file_path( - _context.rowset_dir, _context.rowset_id, _num_segcompacted, index_id); + _context.rowset_dir, _context.rowset_id, _num_segcompacted, index_id, + index_info->get_index_suffix()); VLOG_DEBUG << "segcompaction skip this index. rename " << src_idx_path << " to " << dst_idx_path; ret = rename(src_idx_path.c_str(), dst_idx_path.c_str()); @@ -427,6 +428,13 @@ Status BetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { if (rowset->rowset_meta()->has_delete_predicate()) { _rowset_meta->set_delete_predicate(rowset->rowset_meta()->delete_predicate()); } + // Update the tablet schema in the rowset metadata if the tablet schema contains a variant. + // During the build process, _context.tablet_schema will be used as the rowset schema. + // This situation may arise in the event of a linked schema change. If this schema is not set, + // the subcolumns of the variant will be lost. + if (_context.tablet_schema->num_variant_columns() > 0 && rowset->tablet_schema() != nullptr) { + _context.tablet_schema = rowset->tablet_schema(); + } return Status::OK(); } @@ -445,15 +453,9 @@ Status BetaRowsetWriter::flush_memtable(vectorized::Block* block, int32_t segmen return Status::OK(); } - TabletSchemaSPtr flush_schema; - if (_context.tablet_schema->num_variant_columns() > 0) { - // Unfold variant column - RETURN_IF_ERROR(expand_variant_to_subcolumns(*block, flush_schema)); - } { SCOPED_RAW_TIMER(&_segment_writer_ns); - RETURN_IF_ERROR( - _segment_creator.flush_single_block(block, segment_id, flush_size, flush_schema)); + RETURN_IF_ERROR(_segment_creator.flush_single_block(block, segment_id, flush_size)); } return Status::OK(); } @@ -559,9 +561,8 @@ bool BetaRowsetWriter::_is_segment_overlapping( // => update_schema: A(bigint), B(double), C(int), D(int) void BetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) { std::lock_guard lock(*(_context.schema_lock)); - TabletSchemaSPtr update_schema = std::make_shared(); - vectorized::schema_util::get_least_common_schema({_context.tablet_schema, flush_schema}, - update_schema); + TabletSchemaSPtr update_schema = vectorized::schema_util::get_least_common_schema( + {_context.tablet_schema, flush_schema}, nullptr); CHECK_GE(update_schema->num_columns(), flush_schema->num_columns()) << "Rowset merge schema columns count is " << update_schema->num_columns() << ", but flush_schema is larger " << flush_schema->num_columns() @@ -778,113 +779,4 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( return Status::OK(); } -Status BetaRowsetWriter::expand_variant_to_subcolumns(vectorized::Block& block, - TabletSchemaSPtr& flush_schema) { - size_t num_rows = block.rows(); - if (num_rows == 0) { - return Status::OK(); - } - - std::vector variant_column_pos; - if (is_partial_update()) { - // check columns that used to do partial updates should not include variant - for (int i : get_partial_update_info()->update_cids) { - if (_context.tablet_schema->columns()[i].is_variant_type()) { - return Status::InvalidArgument("Not implement partial updates for variant"); - } - } - } else { - for (int i = 0; i < _context.tablet_schema->columns().size(); ++i) { - if (_context.tablet_schema->columns()[i].is_variant_type()) { - variant_column_pos.push_back(i); - } - } - } - - if (variant_column_pos.empty()) { - return Status::OK(); - } - - try { - // Parse each variant column from raw string column - vectorized::schema_util::parse_variant_columns(block, variant_column_pos); - vectorized::schema_util::finalize_variant_columns(block, variant_column_pos, - false /*not ingore sparse*/); - vectorized::schema_util::encode_variant_sparse_subcolumns(block, variant_column_pos); - } catch (const doris::Exception& e) { - // TODO more graceful, max_filter_ratio - LOG(WARNING) << "encounter execption " << e.to_string(); - return Status::InternalError(e.to_string()); - } - - // Dynamic Block consists of two parts, dynamic part of columns and static part of columns - // static extracted - // | --------- | ----------- | - // The static ones are original _tablet_schame columns - flush_schema = std::make_shared(); - flush_schema->copy_from(*_context.tablet_schema); - vectorized::Block flush_block(std::move(block)); - - // If column already exist in original tablet schema, then we pick common type - // and cast column to common type, and modify tablet column to common type, - // otherwise it's a new column, we should add to frontend - auto append_column = [&](const TabletColumn& parent_variant, auto& column_entry_from_object) { - const std::string& column_name = - parent_variant.name_lower_case() + "." + column_entry_from_object->path.get_path(); - const vectorized::DataTypePtr& final_data_type_from_object = - column_entry_from_object->data.get_least_common_type(); - TabletColumn tablet_column; - vectorized::PathInDataBuilder full_path_builder; - auto full_path = full_path_builder.append(parent_variant.name_lower_case(), false) - .append(column_entry_from_object->path.get_parts(), false) - .build(); - vectorized::schema_util::get_column_by_type( - final_data_type_from_object, column_name, tablet_column, - vectorized::schema_util::ExtraInfo {.unique_id = -1, - .parent_unique_id = parent_variant.unique_id(), - .path_info = full_path}); - flush_schema->append_column(std::move(tablet_column)); - flush_block.insert({column_entry_from_object->data.get_finalized_column_ptr()->get_ptr(), - final_data_type_from_object, column_name}); - }; - - // 1. Flatten variant column into flat columns, append flatten columns to the back of original Block and TabletSchema - // those columns are extracted columns, leave none extracted columns remain in original variant column, which is - // JSONB format at present. - // 2. Collect columns that need to be added or modified when data type changes or new columns encountered - for (size_t i = 0; i < variant_column_pos.size(); ++i) { - size_t variant_pos = variant_column_pos[i]; - vectorized::ColumnObject& object_column = assert_cast( - flush_block.get_by_position(variant_pos).column->assume_mutable_ref()); - const TabletColumn& parent_column = _context.tablet_schema->columns()[variant_pos]; - CHECK(object_column.is_finalized()); - std::shared_ptr root; - for (auto& entry : object_column.get_subcolumns()) { - if (entry->path.empty()) { - // root - root = entry; - continue; - } - append_column(parent_column, entry); - } - // Create new variant column and set root column - auto obj = vectorized::ColumnObject::create(true, false); - // '{}' indicates a root path - static_cast(obj.get())->add_sub_column( - {}, root->data.get_finalized_column_ptr()->assume_mutable(), - root->data.get_least_common_type()); - flush_block.get_by_position(variant_pos).column = obj->get_ptr(); - vectorized::PathInDataBuilder full_root_path_builder; - auto full_root_path = - full_root_path_builder.append(parent_column.name_lower_case(), false).build(); - flush_schema->mutable_columns()[variant_pos].set_path_info(full_root_path); - VLOG_DEBUG << "set root_path : " << full_root_path.get_path(); - } - update_rowset_schema(flush_schema); - block.swap(flush_block); - VLOG_DEBUG << "dump block: " << block.dump_data(); - VLOG_DEBUG << "dump flush schema: " << flush_schema->dump_structure(); - return Status::OK(); -} - } // namespace doris diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 1821b67003..62e802658f 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -165,14 +165,7 @@ private: Status _rename_compacted_segment_plain(uint64_t seg_id); Status _rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id); - // Unfold variant column to Block - // Eg. [A | B | C | (D, E, F)] - // After unfold block structure changed to -> [A | B | C | D | E | F] - // The expanded D, E, F is dynamic part of the block - // The flushed Block columns should match exactly from the same type of frontend meta - Status expand_variant_to_subcolumns(vectorized::Block& block, TabletSchemaSPtr& flush_schema); void update_rowset_schema(TabletSchemaSPtr flush_schema); - // build a tmp rowset for load segment to calc delete_bitmap // for this segment RowsetSharedPtr _build_tmp(); diff --git a/be/src/olap/rowset/beta_rowset_writer_v2.cpp b/be/src/olap/rowset/beta_rowset_writer_v2.cpp index d5f58d87b3..7d83742af1 100644 --- a/be/src/olap/rowset/beta_rowset_writer_v2.cpp +++ b/be/src/olap/rowset/beta_rowset_writer_v2.cpp @@ -96,11 +96,9 @@ Status BetaRowsetWriterV2::flush_memtable(vectorized::Block* block, int32_t segm return Status::OK(); } - TabletSchemaSPtr flush_schema; { SCOPED_RAW_TIMER(&_segment_writer_ns); - RETURN_IF_ERROR( - _segment_creator.flush_single_block(block, segment_id, flush_size, flush_schema)); + RETURN_IF_ERROR(_segment_creator.flush_single_block(block, segment_id, flush_size)); } // delete bitmap and seg compaction are done on the destination BE. return Status::OK(); diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index d1b6253b13..365f4a734f 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -80,6 +80,8 @@ struct RowsetReaderContext { bool is_key_column_group = false; const std::set* output_columns = nullptr; RowsetId rowset_id; + // slots that cast may be eliminated in storage layer + std::map target_cast_type_for_variants; }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h index 49a63b1e42..89147aca04 100644 --- a/be/src/olap/rowset/rowset_writer_context.h +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -63,6 +63,7 @@ struct RowsetWriterContext { io::FileSystemSPtr fs; std::string rowset_dir; TabletSchemaSPtr tablet_schema; + TabletSchemaSPtr original_tablet_schema; // PREPARED/COMMITTED for pending rowset // VISIBLE for non-pending rowset RowsetStatePB rowset_state; diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index e3eeeb0201..c772ba711a 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -135,10 +135,11 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e strings::Substitute("Failed to delete file=$0", seg_path)); // Delete inverted index files for (auto column : schema->columns()) { - if (schema->has_inverted_index(column.unique_id())) { - auto index_id = schema->get_inverted_index(column.unique_id())->index_id(); + if (schema->has_inverted_index(column)) { + auto index_info = schema->get_inverted_index(column); + auto index_id = index_info->index_id(); auto idx_path = InvertedIndexDescriptor::inverted_index_file_path( - ctx.rowset_dir, ctx.rowset_id, i, index_id); + ctx.rowset_dir, ctx.rowset_id, i, index_id, index_info->get_index_suffix()); VLOG_DEBUG << "segcompaction index. delete file " << idx_path; RETURN_NOT_OK_STATUS_WITH_WARN( fs->delete_file(idx_path), diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index eb8173609c..7e904478b1 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -27,10 +27,19 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/logging.h" +#include "common/status.h" #include "io/fs/file_writer.h" +#include "olap/olap_define.h" #include "olap/rowset/beta_rowset_writer.h" // SegmentStatistics #include "olap/rowset/segment_v2/segment_writer.h" #include "olap/rowset/segment_v2/vertical_segment_writer.h" +#include "olap/tablet_schema.h" +#include "olap/utils.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_object.h" +#include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" // variant column #include "vec/core/block.h" namespace doris { @@ -40,32 +49,168 @@ SegmentFlusher::SegmentFlusher() = default; SegmentFlusher::~SegmentFlusher() = default; -Status SegmentFlusher::init(const RowsetWriterContext& rowset_writer_context) { - _context = rowset_writer_context; +Status SegmentFlusher::init(RowsetWriterContext& rowset_writer_context) { + _context = &rowset_writer_context; return Status::OK(); } Status SegmentFlusher::flush_single_block(const vectorized::Block* block, int32_t segment_id, - int64_t* flush_size, TabletSchemaSPtr flush_schema) { + int64_t* flush_size) { if (block->rows() == 0) { return Status::OK(); } - bool no_compression = block->bytes() <= config::segment_compression_threshold_kb * 1024; + TabletSchemaSPtr flush_schema = nullptr; + // Expand variant columns + vectorized::Block flush_block(*block); + if (_context->write_type != DataWriteType::TYPE_COMPACTION && + _context->tablet_schema->num_variant_columns() > 0) { + RETURN_IF_ERROR(_expand_variant_to_subcolumns(flush_block, flush_schema)); + } + bool no_compression = flush_block.bytes() <= config::segment_compression_threshold_kb * 1024; if (config::enable_vertical_segment_writer && - _context.tablet_schema->cluster_key_idxes().empty()) { + _context->tablet_schema->cluster_key_idxes().empty()) { std::unique_ptr writer; RETURN_IF_ERROR(_create_segment_writer(writer, segment_id, no_compression, flush_schema)); - RETURN_IF_ERROR(_add_rows(writer, block, 0, block->rows())); + RETURN_IF_ERROR(_add_rows(writer, &flush_block, 0, flush_block.rows())); RETURN_IF_ERROR(_flush_segment_writer(writer, flush_size)); } else { std::unique_ptr writer; RETURN_IF_ERROR(_create_segment_writer(writer, segment_id, no_compression, flush_schema)); - RETURN_IF_ERROR(_add_rows(writer, block, 0, block->rows())); + RETURN_IF_ERROR(_add_rows(writer, &flush_block, 0, flush_block.rows())); RETURN_IF_ERROR(_flush_segment_writer(writer, flush_size)); } return Status::OK(); } +Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block, + TabletSchemaSPtr& flush_schema) { + size_t num_rows = block.rows(); + if (num_rows == 0) { + return Status::OK(); + } + + std::vector variant_column_pos; + if (_context->partial_update_info && _context->partial_update_info->is_partial_update) { + // check columns that used to do partial updates should not include variant + for (int i : _context->partial_update_info->update_cids) { + const auto& col = _context->tablet_schema->columns()[i]; + if (!col.is_key() && col.name() != DELETE_SIGN) { + return Status::InvalidArgument( + "Not implement partial update for variant only support delete currently"); + } + } + } else { + for (int i = 0; i < _context->tablet_schema->columns().size(); ++i) { + if (_context->tablet_schema->columns()[i].is_variant_type()) { + variant_column_pos.push_back(i); + } + } + } + + if (variant_column_pos.empty()) { + return Status::OK(); + } + + RETURN_IF_ERROR( + vectorized::schema_util::parse_and_encode_variant_columns(block, variant_column_pos)); + + // Dynamic Block consists of two parts, dynamic part of columns and static part of columns + // static extracted + // | --------- | ----------- | + // The static ones are original _tablet_schame columns + flush_schema = std::make_shared(); + flush_schema->copy_from(*_context->original_tablet_schema); + + vectorized::Block flush_block(std::move(block)); + // If column already exist in original tablet schema, then we pick common type + // and cast column to common type, and modify tablet column to common type, + // otherwise it's a new column + auto append_column = [&](const TabletColumn& parent_variant, auto& column_entry_from_object) { + const std::string& column_name = + parent_variant.name_lower_case() + "." + column_entry_from_object->path.get_path(); + const vectorized::DataTypePtr& final_data_type_from_object = + column_entry_from_object->data.get_least_common_type(); + vectorized::PathInDataBuilder full_path_builder; + auto full_path = full_path_builder.append(parent_variant.name_lower_case(), false) + .append(column_entry_from_object->path.get_parts(), false) + .build(); + TabletColumn tablet_column = vectorized::schema_util::get_column_by_type( + final_data_type_from_object, column_name, + vectorized::schema_util::ExtraInfo {.unique_id = parent_variant.unique_id(), + .parent_unique_id = parent_variant.unique_id(), + .path_info = full_path}); + flush_schema->append_column(std::move(tablet_column)); + + flush_block.insert({column_entry_from_object->data.get_finalized_column_ptr()->get_ptr(), + final_data_type_from_object, column_name}); + }; + + // 1. Flatten variant column into flat columns, append flatten columns to the back of original Block and TabletSchema + // those columns are extracted columns, leave none extracted columns remain in original variant column, which is + // JSONB format at present. + // 2. Collect columns that need to be added or modified when data type changes or new columns encountered + for (size_t i = 0; i < variant_column_pos.size(); ++i) { + size_t variant_pos = variant_column_pos[i]; + auto column_ref = flush_block.get_by_position(variant_pos).column; + bool is_nullable = column_ref->is_nullable(); + const vectorized::ColumnObject& object_column = assert_cast( + remove_nullable(column_ref)->assume_mutable_ref()); + const TabletColumn& parent_column = _context->tablet_schema->columns()[variant_pos]; + CHECK(object_column.is_finalized()); + std::shared_ptr root; + for (auto& entry : object_column.get_subcolumns()) { + if (entry->path.empty()) { + // root + root = entry; + continue; + } + append_column(parent_column, entry); + } + // Create new variant column and set root column + auto obj = vectorized::ColumnObject::create(true, false); + // '{}' indicates a root path + static_cast(obj.get())->add_sub_column( + {}, root->data.get_finalized_column_ptr()->assume_mutable(), + root->data.get_least_common_type()); + vectorized::ColumnPtr result = obj->get_ptr(); + if (is_nullable) { + const auto& null_map = assert_cast(*column_ref) + .get_null_map_column_ptr(); + result = vectorized::ColumnNullable::create(result, null_map); + } + flush_block.get_by_position(variant_pos).column = result; + vectorized::PathInDataBuilder full_root_path_builder; + auto full_root_path = + full_root_path_builder.append(parent_column.name_lower_case(), false).build(); + flush_schema->mutable_columns()[variant_pos].set_path_info(full_root_path); + VLOG_DEBUG << "set root_path : " << full_root_path.get_path(); + } + + vectorized::schema_util::inherit_tablet_index(flush_schema); + + { + // Update rowset schema, tablet's tablet schema will be updated when build Rowset + // Eg. flush schema: A(int), B(float), C(int), D(int) + // ctx.tablet_schema: A(bigint), B(double) + // => update_schema: A(bigint), B(double), C(int), D(int) + std::lock_guard lock(*(_context->schema_lock)); + TabletSchemaSPtr update_schema = vectorized::schema_util::get_least_common_schema( + {_context->tablet_schema, flush_schema}, nullptr); + CHECK_GE(update_schema->num_columns(), flush_schema->num_columns()) + << "Rowset merge schema columns count is " << update_schema->num_columns() + << ", but flush_schema is larger " << flush_schema->num_columns() + << " update_schema: " << update_schema->dump_structure() + << " flush_schema: " << flush_schema->dump_structure(); + _context->tablet_schema.swap(update_schema); + VLOG_DEBUG << "dump rs schema: " << _context->tablet_schema->dump_structure(); + } + + block.swap(flush_block); + VLOG_DEBUG << "dump block: " << block.dump_data(); + VLOG_DEBUG << "dump flush schema: " << flush_schema->dump_structure(); + return Status::OK(); +} + Status SegmentFlusher::close() { std::lock_guard l(_lock); for (auto& file_writer : _file_writers) { @@ -79,6 +224,12 @@ Status SegmentFlusher::close() { return Status::OK(); } +bool SegmentFlusher::need_buffering() { + // buffering variants for schema change + return _context->write_type == DataWriteType::TYPE_SCHEMA_CHANGE && + _context->tablet_schema->num_variant_columns() > 0; +} + Status SegmentFlusher::_add_rows(std::unique_ptr& segment_writer, const vectorized::Block* block, size_t row_offset, size_t row_num) { @@ -100,20 +251,20 @@ Status SegmentFlusher::_create_segment_writer(std::unique_ptrcreate(segment_id, file_writer)); + RETURN_IF_ERROR(_context->file_writer_creator->create(segment_id, file_writer)); segment_v2::SegmentWriterOptions writer_options; - writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write; - writer_options.rowset_ctx = &_context; - writer_options.write_type = _context.write_type; + writer_options.enable_unique_key_merge_on_write = _context->enable_unique_key_merge_on_write; + writer_options.rowset_ctx = _context; + writer_options.write_type = _context->write_type; if (no_compression) { writer_options.compression_type = NO_COMPRESSION; } - const auto& tablet_schema = flush_schema ? flush_schema : _context.tablet_schema; + const auto& tablet_schema = flush_schema ? flush_schema : _context->tablet_schema; writer.reset(new segment_v2::SegmentWriter( - file_writer.get(), segment_id, tablet_schema, _context.tablet, _context.data_dir, - _context.max_rows_per_segment, writer_options, _context.mow_context)); + file_writer.get(), segment_id, tablet_schema, _context->tablet, _context->data_dir, + _context->max_rows_per_segment, writer_options, _context->mow_context)); { std::lock_guard l(_lock); _file_writers.push_back(std::move(file_writer)); @@ -131,20 +282,20 @@ Status SegmentFlusher::_create_segment_writer( std::unique_ptr& writer, int32_t segment_id, bool no_compression, TabletSchemaSPtr flush_schema) { io::FileWriterPtr file_writer; - RETURN_IF_ERROR(_context.file_writer_creator->create(segment_id, file_writer)); + RETURN_IF_ERROR(_context->file_writer_creator->create(segment_id, file_writer)); segment_v2::VerticalSegmentWriterOptions writer_options; - writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write; - writer_options.rowset_ctx = &_context; - writer_options.write_type = _context.write_type; + writer_options.enable_unique_key_merge_on_write = _context->enable_unique_key_merge_on_write; + writer_options.rowset_ctx = _context; + writer_options.write_type = _context->write_type; if (no_compression) { writer_options.compression_type = NO_COMPRESSION; } - const auto& tablet_schema = flush_schema ? flush_schema : _context.tablet_schema; + const auto& tablet_schema = flush_schema ? flush_schema : _context->tablet_schema; writer.reset(new segment_v2::VerticalSegmentWriter( - file_writer.get(), segment_id, tablet_schema, _context.tablet, _context.data_dir, - _context.max_rows_per_segment, writer_options, _context.mow_context)); + file_writer.get(), segment_id, tablet_schema, _context->tablet, _context->data_dir, + _context->max_rows_per_segment, writer_options, _context->mow_context)); { std::lock_guard l(_lock); _file_writers.push_back(std::move(file_writer)); @@ -172,9 +323,9 @@ Status SegmentFlusher::_flush_segment_writer( if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } - VLOG_DEBUG << "tablet_id:" << _context.tablet_id + VLOG_DEBUG << "tablet_id:" << _context->tablet_id << " flushing filename: " << writer->data_dir_path() - << " rowset_id:" << _context.rowset_id; + << " rowset_id:" << _context->rowset_id; KeyBoundsPB key_bounds; Slice min_key = writer->min_encoded_key(); @@ -192,7 +343,7 @@ Status SegmentFlusher::_flush_segment_writer( writer.reset(); - RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat)); + RETURN_IF_ERROR(_context->segment_collector->add(segment_id, segstat)); if (flush_size) { *flush_size = segment_size + index_size; @@ -214,9 +365,9 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptrtablet_id + << " flushing rowset_dir: " << _context->rowset_dir + << " rowset_id:" << _context->rowset_id; KeyBoundsPB key_bounds; Slice min_key = writer->min_encoded_key(); @@ -234,7 +385,7 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptradd(segment_id, segstat)); + RETURN_IF_ERROR(_context->segment_collector->add(segment_id, segstat)); if (flush_size) { *flush_size = segment_size + index_size; @@ -265,7 +416,7 @@ int64_t SegmentFlusher::Writer::max_row_to_add(size_t row_avg_size_in_bytes) { return _writer->max_row_to_add(row_avg_size_in_bytes); } -Status SegmentCreator::init(const RowsetWriterContext& rowset_writer_context) { +Status SegmentCreator::init(RowsetWriterContext& rowset_writer_context) { RETURN_IF_ERROR(_segment_flusher.init(rowset_writer_context)); return Status::OK(); } @@ -280,6 +431,16 @@ Status SegmentCreator::add_block(const vectorized::Block* block) { size_t row_avg_size_in_bytes = std::max((size_t)1, block_size_in_bytes / block_row_num); size_t row_offset = 0; + if (_segment_flusher.need_buffering()) { + if (_buffer_block.allocated_bytes() > config::write_buffer_size) { + vectorized::Block block = _buffer_block.to_block(); + RETURN_IF_ERROR(flush_single_block(&block)); + } else { + RETURN_IF_ERROR(_buffer_block.merge(*block)); + } + return Status::OK(); + } + if (_flush_writer == nullptr) { RETURN_IF_ERROR(_segment_flusher.create_writer(_flush_writer, allocate_segment_id())); } @@ -302,6 +463,10 @@ Status SegmentCreator::add_block(const vectorized::Block* block) { } Status SegmentCreator::flush() { + if (_buffer_block.rows() > 0) { + vectorized::Block block = _buffer_block.to_block(); + RETURN_IF_ERROR(flush_single_block(&block)); + } if (_flush_writer == nullptr) { return Status::OK(); } @@ -311,12 +476,11 @@ Status SegmentCreator::flush() { } Status SegmentCreator::flush_single_block(const vectorized::Block* block, int32_t segment_id, - int64_t* flush_size, TabletSchemaSPtr flush_schema) { + int64_t* flush_size) { if (block->rows() == 0) { return Status::OK(); } - RETURN_IF_ERROR( - _segment_flusher.flush_single_block(block, segment_id, flush_size, flush_schema)); + RETURN_IF_ERROR(_segment_flusher.flush_single_block(block, segment_id, flush_size)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_creator.h b/be/src/olap/rowset/segment_creator.h index 054a416e07..0a53117b4d 100644 --- a/be/src/olap/rowset/segment_creator.h +++ b/be/src/olap/rowset/segment_creator.h @@ -27,6 +27,7 @@ #include "olap/olap_common.h" #include "olap/rowset/rowset_writer_context.h" #include "util/spinlock.h" +#include "vec/core/block.h" namespace doris { namespace vectorized { @@ -87,13 +88,12 @@ public: ~SegmentFlusher(); - Status init(const RowsetWriterContext& rowset_writer_context); + Status init(RowsetWriterContext& rowset_writer_context); // Return the file size flushed to disk in "flush_size" // This method is thread-safe. Status flush_single_block(const vectorized::Block* block, int32_t segment_id, - int64_t* flush_size = nullptr, - TabletSchemaSPtr flush_schema = nullptr); + int64_t* flush_size = nullptr); int64_t num_rows_written() const { return _num_rows_written; } @@ -125,7 +125,10 @@ public: Status create_writer(std::unique_ptr& writer, uint32_t segment_id); + bool need_buffering(); + private: + Status _expand_variant_to_subcolumns(vectorized::Block& block, TabletSchemaSPtr& flush_schema); Status _add_rows(std::unique_ptr& segment_writer, const vectorized::Block* block, size_t row_offset, size_t row_num); Status _add_rows(std::unique_ptr& segment_writer, @@ -142,7 +145,7 @@ private: int64_t* flush_size = nullptr); private: - RowsetWriterContext _context; + RowsetWriterContext* _context; mutable SpinLock _lock; // protect following vectors. std::vector _file_writers; @@ -158,7 +161,7 @@ public: ~SegmentCreator() = default; - Status init(const RowsetWriterContext& rowset_writer_context); + Status init(RowsetWriterContext& rowset_writer_context); void set_segment_start_id(uint32_t start_id) { _next_segment_id = start_id; } @@ -178,8 +181,7 @@ public: // Return the file size flushed to disk in "flush_size" // This method is thread-safe. Status flush_single_block(const vectorized::Block* block, int32_t segment_id, - int64_t* flush_size = nullptr, - TabletSchemaSPtr flush_schema = nullptr); + int64_t* flush_size = nullptr); // Flush a block into a single segment, without pre-allocated segment_id. // This method is thread-safe. @@ -193,6 +195,9 @@ private: std::atomic _next_segment_id = 0; SegmentFlusher _segment_flusher; std::unique_ptr _flush_writer; + + // Buffer block to num bytes before flushing + vectorized::MutableBlock _buffer_block; }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 1432d51eb1..ee5d04864d 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -47,6 +47,7 @@ #include "olap/rowset/segment_v2/page_io.h" #include "olap/rowset/segment_v2/page_pointer.h" // for PagePointer #include "olap/rowset/segment_v2/row_ranges.h" +#include "olap/rowset/segment_v2/segment.h" #include "olap/rowset/segment_v2/zone_map_index.h" #include "olap/tablet_schema.h" #include "olap/types.h" // for TypeInfo @@ -61,10 +62,12 @@ #include "vec/columns/column_array.h" #include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" +#include "vec/columns/column_object.h" #include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" //for VecDateTime @@ -171,6 +174,14 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& *reader = std::move(map_reader); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_VARIANT: { + // Read variant only root data using a single ColumnReader + std::unique_ptr reader_local( + new ColumnReader(opts, meta, num_rows, file_reader)); + RETURN_IF_ERROR(reader_local->init(&meta)); + *reader = std::move(reader_local); + return Status::OK(); + } default: return Status::NotSupported("unsupported type for ColumnReader: {}", std::to_string(int(type))); @@ -668,6 +679,10 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { val_iterator); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_VARIANT: { + *iterator = new VariantRootColumnIterator(new FileColumnIterator(this)); + return Status::OK(); + } default: return Status::NotSupported("unsupported type to create iterator: {}", std::to_string(int(type))); @@ -1435,6 +1450,10 @@ void DefaultValueColumnIterator::insert_default_data(const TypeInfo* type_info, } break; } + case FieldType::OLAP_FIELD_TYPE_VARIANT: { + dst->insert_many_defaults(n); + break; + } default: { char* data_ptr = (char*)mem_value; size_t data_len = type_size; @@ -1464,5 +1483,73 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP } } +Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + size_t size = dst->size(); + auto& obj = + dst->is_nullable() + ? assert_cast( + assert_cast(*dst).get_nested_column()) + : assert_cast(*dst); + if (obj.is_null_root()) { + obj.create_root(); + } + auto root_column = obj.get_root(); + RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); + obj.incr_num_rows(*n); + for (auto& entry : obj.get_subcolumns()) { + if (entry->data.size() != size + *n) { + entry->data.insertManyDefaults(*n); + } + } + // fill nullmap + if (root_column->is_nullable()) { + DCHECK(dst->is_nullable()); + vectorized::ColumnUInt8& dst_null_map = + assert_cast(*dst).get_null_map_column(); + vectorized::ColumnUInt8& src_null_map = + assert_cast(*root_column).get_null_map_column(); + dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); + } +#ifndef NDEBUG + obj.check_consistency(); +#endif + return Status::OK(); +} + +Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + size_t size = dst->size(); + auto& obj = + dst->is_nullable() + ? assert_cast( + assert_cast(*dst).get_nested_column()) + : assert_cast(*dst); + if (obj.is_null_root()) { + obj.create_root(); + } + auto root_column = obj.get_root(); + RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column)); + obj.incr_num_rows(count); + for (auto& entry : obj.get_subcolumns()) { + if (entry->data.size() != size + count) { + entry->data.insertManyDefaults(count); + } + } + // fill nullmap + if (root_column->is_nullable()) { + DCHECK(dst->is_nullable()); + vectorized::ColumnUInt8& dst_null_map = + assert_cast(*dst).get_null_map_column(); + vectorized::ColumnUInt8& src_null_map = + assert_cast(*root_column).get_null_map_column(); + dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); + } +#ifndef NDEBUG + obj.check_consistency(); +#endif + return Status::OK(); +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index fbd72dbd33..99ff231080 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -44,7 +44,9 @@ #include "util/once.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" // ColumnArray +#include "vec/columns/subcolumn_tree.h" #include "vec/data_types/data_type.h" +#include "vec/json/path_in_data.h" namespace doris { @@ -595,6 +597,38 @@ private: int32_t _segment_id = 0; }; +class VariantRootColumnIterator : public ColumnIterator { +public: + VariantRootColumnIterator() = delete; + + explicit VariantRootColumnIterator(FileColumnIterator* iter) { _inner_iter.reset(iter); } + + ~VariantRootColumnIterator() override = default; + + Status init(const ColumnIteratorOptions& opts) override { return _inner_iter->init(opts); } + + Status seek_to_first() override { return _inner_iter->seek_to_first(); } + + Status seek_to_ordinal(ordinal_t ord_idx) override { + return _inner_iter->seek_to_ordinal(ord_idx); + } + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { + bool has_null; + return next_batch(n, dst, &has_null); + } + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); } + +private: + std::unique_ptr _inner_iter; +}; + // This iterator is used to read default value column class DefaultValueColumnIterator : public ColumnIterator { public: diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp new file mode 100644 index 0000000000..69e18cb14c --- /dev/null +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -0,0 +1,232 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/hierarchical_data_reader.h" + +#include "common/status.h" +#include "io/io_common.h" +#include "olap/rowset/segment_v2/column_reader.h" +#include "vec/columns/column.h" +#include "vec/columns/column_object.h" +#include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/json/path_in_data.h" + +namespace doris { +namespace segment_v2 { + +Status HierarchicalDataReader::create(std::unique_ptr* reader, + const SubcolumnColumnReaders::Node* node, + const SubcolumnColumnReaders::Node* root, + bool output_as_raw_json) { + // None leave node need merge with root + auto* stream_iter = new HierarchicalDataReader(node->path, output_as_raw_json); + std::vector leaves; + vectorized::PathsInData leaves_paths; + SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths); + for (size_t i = 0; i < leaves_paths.size(); ++i) { + if (leaves_paths[i] == root->path) { + // use set_root to share instead + continue; + } + RETURN_IF_ERROR(stream_iter->add_stream(leaves[i])); + } + // Make sure the root node is in strem_cache, so that child can merge data with root + // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root path so that + // we could make sure the data could be fully merged, since some column may not be extracted but remains in root + // like {"a" : "b" : {"e" : 1.1}} in jsonb format + ColumnIterator* it; + RETURN_IF_ERROR(root->data.reader->new_iterator(&it)); + stream_iter->set_root(std::make_unique( + root->data.file_column_type->create_column(), std::unique_ptr(it), + root->data.file_column_type)); + reader->reset(stream_iter); + return Status::OK(); +} + +Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) { + RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { + RETURN_IF_ERROR(node.data.iterator->init(opts)); + node.data.inited = true; + return Status::OK(); + })); + if (_root_reader && !_root_reader->inited) { + RETURN_IF_ERROR(_root_reader->iterator->init(opts)); + _root_reader->inited = true; + } + return Status::OK(); +} + +Status HierarchicalDataReader::seek_to_first() { + LOG(FATAL) << "Not implemented"; +} + +Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) { + RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { + RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord)); + return Status::OK(); + })); + if (_root_reader) { + DCHECK(_root_reader->inited); + RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord)); + } + return Status::OK(); +} + +Status HierarchicalDataReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + return process_read( + [&](StreamReader& reader, const vectorized::PathInData& path, + const vectorized::DataTypePtr& type) { + CHECK(reader.inited); + RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, has_null)); + VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", path.get_path(), *n, + type->get_name()); + reader.rows_read += *n; + return Status::OK(); + }, + dst, *n); +} + +Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + return process_read( + [&](StreamReader& reader, const vectorized::PathInData& path, + const vectorized::DataTypePtr& type) { + CHECK(reader.inited); + RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, reader.column)); + VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, type={}", path.get_path(), + count, type->get_name()); + reader.rows_read += count; + return Status::OK(); + }, + dst, count); +} + +Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* node) { + if (_substream_reader.find_leaf(node->path)) { + VLOG_DEBUG << "Already exist sub column " << node->path.get_path(); + return Status::OK(); + } + CHECK(node); + ColumnIterator* it; + RETURN_IF_ERROR(node->data.reader->new_iterator(&it)); + std::unique_ptr it_ptr; + it_ptr.reset(it); + StreamReader reader(node->data.file_column_type->create_column(), std::move(it_ptr), + node->data.file_column_type); + bool added = _substream_reader.add(node->path, std::move(reader)); + if (!added) { + return Status::InternalError("Failed to add node path {}", node->path.get_path()); + } + VLOG_DEBUG << fmt::format("Add substream {} for {}", node->path.get_path(), _path.get_path()); + return Status::OK(); +} + +ordinal_t HierarchicalDataReader::get_current_ordinal() const { + return (*_substream_reader.begin())->data.iterator->get_current_ordinal(); +} + +Status ExtractReader::init(const ColumnIteratorOptions& opts) { + if (!_root_reader->inited) { + RETURN_IF_ERROR(_root_reader->iterator->init(opts)); + _root_reader->inited = true; + } + return Status::OK(); +} + +Status ExtractReader::seek_to_first() { + LOG(FATAL) << "Not implemented"; +} + +Status ExtractReader::seek_to_ordinal(ordinal_t ord) { + CHECK(_root_reader->inited); + return _root_reader->iterator->seek_to_ordinal(ord); +} + +Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t nrows) { + DCHECK(_root_reader); + DCHECK(_root_reader->inited); + vectorized::ColumnNullable* nullable_column = nullptr; + if (dst->is_nullable()) { + nullable_column = assert_cast(dst.get()); + } + auto& variant = + nullable_column == nullptr + ? assert_cast(*dst) + : assert_cast(nullable_column->get_nested_column()); + const auto& root = + _root_reader->column->is_nullable() + ? assert_cast( + assert_cast(*_root_reader->column) + .get_nested_column()) + : assert_cast(*_root_reader->column); + // extract root value with path, we can't modify the original root column + // since some other column may depend on it. + vectorized::MutableColumnPtr extracted_column; + RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b + _col.path_info().pop_front(), extracted_column)); + if (variant.empty() || variant.is_null_root()) { + variant.create_root(root.get_root_type(), std::move(extracted_column)); + } else { + vectorized::ColumnPtr cast_column; + const auto& expected_type = variant.get_root_type(); + RETURN_IF_ERROR(vectorized::schema_util::cast_column( + {extracted_column->get_ptr(), + vectorized::make_nullable( + std::make_shared()), + ""}, + expected_type, &cast_column)); + variant.get_root()->insert_range_from(*cast_column, 0, nrows); + variant.set_num_rows(variant.get_root()->size()); + } + if (dst->is_nullable()) { + // fill nullmap + vectorized::ColumnUInt8& dst_null_map = + assert_cast(*dst).get_null_map_column(); + vectorized::ColumnUInt8& src_null_map = + assert_cast(*variant.get_root()).get_null_map_column(); + dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); + } + _root_reader->column->clear(); +#ifndef NDEBUG + variant.check_consistency(); +#endif + return Status::OK(); +} + +Status ExtractReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { + RETURN_IF_ERROR(_root_reader->iterator->next_batch(n, _root_reader->column)); + RETURN_IF_ERROR(extract_to(dst, *n)); + return Status::OK(); +} + +Status ExtractReader::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + RETURN_IF_ERROR(_root_reader->iterator->read_by_rowids(rowids, count, _root_reader->column)); + RETURN_IF_ERROR(extract_to(dst, count)); + return Status::OK(); +} + +ordinal_t ExtractReader::get_current_ordinal() const { + return _root_reader->iterator->get_current_ordinal(); +} + +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h new file mode 100644 index 0000000000..9b8b25b26b --- /dev/null +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -0,0 +1,237 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "io/io_common.h" +#include "olap/field.h" +#include "olap/iterators.h" +#include "olap/rowset/segment_v2/column_reader.h" +#include "olap/schema.h" +#include "olap/tablet_schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_object.h" +#include "vec/columns/subcolumn_tree.h" +#include "vec/common/assert_cast.h" +#include "vec/data_types/data_type_object.h" +#include "vec/data_types/data_type_string.h" +#include "vec/json/path_in_data.h" + +namespace doris { +namespace segment_v2 { + +struct StreamReader { + vectorized::MutableColumnPtr column; + std::unique_ptr iterator; + std::shared_ptr type; + bool inited = false; + size_t rows_read = 0; + StreamReader() = default; + StreamReader(vectorized::MutableColumnPtr&& col, std::unique_ptr&& it, + std::shared_ptr t) + : column(std::move(col)), iterator(std::move(it)), type(t) {} +}; + +// path -> StreamReader +using SubstreamReaderTree = vectorized::SubcolumnsTree; + +// path -> SubcolumnReader +struct SubcolumnReader { + std::unique_ptr reader; + std::shared_ptr file_column_type; +}; +using SubcolumnColumnReaders = vectorized::SubcolumnsTree; + +// Reader for hierarchical data for variant, merge with root(sparse encoded columns) +class HierarchicalDataReader : public ColumnIterator { +public: + HierarchicalDataReader(const vectorized::PathInData& path, bool output_as_raw_json = false) + : _path(path), _output_as_raw_json(output_as_raw_json) {} + + static Status create(std::unique_ptr* reader, + const SubcolumnColumnReaders::Node* target_node, + const SubcolumnColumnReaders::Node* root, bool output_as_raw_json = false); + + Status init(const ColumnIteratorOptions& opts) override; + + Status seek_to_first() override; + + Status seek_to_ordinal(ordinal_t ord) override; + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + ordinal_t get_current_ordinal() const override; + + Status add_stream(const SubcolumnColumnReaders::Node* node); + + void set_root(std::unique_ptr&& root) { _root_reader = std::move(root); } + +private: + SubstreamReaderTree _substream_reader; + std::unique_ptr _root_reader; + size_t _rows_read = 0; + vectorized::PathInData _path; + bool _output_as_raw_json = false; + + template + Status tranverse(NodeFunction&& node_func) { + for (auto& entry : _substream_reader) { + RETURN_IF_ERROR(node_func(*entry)); + } + return Status::OK(); + } + // process read + template + Status process_read(ReadFunction&& read_func, vectorized::MutableColumnPtr& dst, size_t nrows) { + // // Read all sub columns, and merge with root column + vectorized::ColumnNullable* nullable_column = nullptr; + if (dst->is_nullable()) { + nullable_column = assert_cast(dst.get()); + } + auto& variant = nullable_column == nullptr ? assert_cast(*dst) + : assert_cast( + nullable_column->get_nested_column()); + + // read data + // read root first if it is not read before + RETURN_IF_ERROR(read_func(*_root_reader, {}, _root_reader->type)); + + // read container columns + RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { + RETURN_IF_ERROR(read_func(node.data, node.path, node.data.type)); + return Status::OK(); + })); + + // build variant as container + auto container = vectorized::ColumnObject::create(true, false); + auto& container_variant = assert_cast(*container); + + // add root first + if (_path.get_parts().size() == 1) { + auto& root_var = + _root_reader->column->is_nullable() + ? assert_cast( + assert_cast( + *_root_reader->column) + .get_nested_column()) + : assert_cast(*_root_reader->column); + auto column = root_var.get_root(); + auto type = root_var.get_root_type(); + container_variant.add_sub_column({}, std::move(column), type); + } + + RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { + vectorized::MutableColumnPtr column = node.data.column->get_ptr(); + bool add = container_variant.add_sub_column(node.path.pop_front(), std::move(column), + node.data.type); + if (!add) { + return Status::InternalError("Duplicated {}, type {}", node.path.get_path(), + node.data.type->get_name()); + } + return Status::OK(); + })); + + if (_output_as_raw_json) { + auto col_to = vectorized::ColumnString::create(); + col_to->reserve(nrows * 2); + vectorized::VectorBufferWriter write_buffer(*col_to.get()); + auto type = std::make_shared(); + for (size_t i = 0; i < nrows; ++i) { + type->to_string(container_variant, i, write_buffer); + write_buffer.commit(); + } + CHECK(variant.empty()); + variant.create_root(std::make_shared(), std::move(col_to)); + } else { + // TODO select v:b -> v.b / v.b.c but v.d maybe in v + // copy container variant to dst variant, todo avoid copy + variant.insert_range_from(container_variant, 0, nrows); + } + + // variant.set_num_rows(nrows); + _rows_read += nrows; + variant.finalize(); +#ifndef NDEBUG + variant.check_consistency(); +#endif + // clear data in nodes + RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { + node.data.column->clear(); + return Status::OK(); + })); + container->clear(); + if (_root_reader->column->is_nullable()) { + // fill nullmap + DCHECK(dst->is_nullable()); + vectorized::ColumnUInt8& dst_null_map = + assert_cast(*dst).get_null_map_column(); + vectorized::ColumnUInt8& src_null_map = + assert_cast(*_root_reader->column) + .get_null_map_column(); + dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); + // clear nullmap and inner data + src_null_map.clear(); + assert_cast( + assert_cast(*_root_reader->column) + .get_nested_column()) + .clear_subcolumns_data(); + } else { + vectorized::ColumnObject& root_column = + assert_cast(*_root_reader->column); + root_column.clear_subcolumns_data(); + } + return Status::OK(); + } +}; + +// Extract from root column of variant, since root column of variant +// encodes sparse columns that are not materialized +class ExtractReader : public ColumnIterator { +public: + ExtractReader(const TabletColumn& col, std::unique_ptr&& root_reader) + : _col(col), _root_reader(std::move(root_reader)) {} + + Status init(const ColumnIteratorOptions& opts) override; + + Status seek_to_first() override; + + Status seek_to_ordinal(ordinal_t ord) override; + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + ordinal_t get_current_ordinal() const override; + +private: + Status extract_to(vectorized::MutableColumnPtr& dst, size_t nrows); + + const TabletColumn& _col; + // may shared among different column iterators + std::unique_ptr _root_reader; +}; + +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp b/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp index 4bdf44ead0..bf05b85751 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp @@ -27,30 +27,36 @@ const std::string segment_suffix = ".dat"; const std::string index_suffix = ".idx"; const std::string index_name_separator = "_"; -std::string InvertedIndexDescriptor::get_temporary_index_path(const std::string& segment_path, - uint32_t uuid) { +std::string InvertedIndexDescriptor::get_temporary_index_path( + const std::string& segment_path, uint32_t uuid, const std::string& index_suffix_path) { + std::string suffix = index_suffix_path.empty() ? "" : "@" + index_suffix_path; return StripSuffixString(segment_path, segment_suffix) + index_name_separator + - std::to_string(uuid); + std::to_string(uuid) + suffix; } std::string InvertedIndexDescriptor::get_index_file_name(const std::string& segment_path, - uint32_t uuid) { + uint32_t uuid, + const std::string& index_suffix_path) { + std::string suffix = index_suffix_path.empty() ? "" : "@" + index_suffix_path; return StripSuffixString(segment_path, segment_suffix) + index_name_separator + - std::to_string(uuid) + index_suffix; + std::to_string(uuid) + suffix + index_suffix; } -std::string InvertedIndexDescriptor::inverted_index_file_path(const string& rowset_dir, - const RowsetId& rowset_id, - int segment_id, int64_t index_id) { +std::string InvertedIndexDescriptor::inverted_index_file_path( + const string& rowset_dir, const RowsetId& rowset_id, int segment_id, int64_t index_id, + const std::string& index_suffix_path) { // {rowset_dir}/{schema_hash}/{rowset_id}_{seg_num}_{index_id}.idx - return fmt::format("{}/{}_{}_{}.idx", rowset_dir, rowset_id.to_string(), segment_id, index_id); + std::string suffix = index_suffix_path.empty() ? "" : "@" + index_suffix_path; + return fmt::format("{}/{}_{}_{}{}.idx", rowset_dir, rowset_id.to_string(), segment_id, index_id, + suffix); } std::string InvertedIndexDescriptor::local_inverted_index_path_segcompacted( const string& tablet_path, const RowsetId& rowset_id, int64_t begin, int64_t end, - int64_t index_id) { + int64_t index_id, const std::string& index_suffix_path) { // {root_path}/data/{shard_id}/{tablet_id}/{schema_hash}/{rowset_id}_{begin_seg}-{end_seg}_{index_id}.idx - return fmt::format("{}/{}_{}-{}_{}.idx", tablet_path, rowset_id.to_string(), begin, end, - index_id); + std::string suffix = index_suffix_path.empty() ? "" : "@" + index_suffix_path; + return fmt::format("{}/{}_{}-{}_{}{}.idx", tablet_path, rowset_id.to_string(), begin, end, + index_id, suffix); } } // namespace doris::segment_v2 \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index_desc.h b/be/src/olap/rowset/segment_v2/inverted_index_desc.h index 1cf4636d80..25c9913e21 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_desc.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_desc.h @@ -28,20 +28,24 @@ namespace segment_v2 { class InvertedIndexDescriptor { public: - static std::string get_temporary_index_path(const std::string& segment_path, uint32_t uuid); - static std::string get_index_file_name(const std::string& path, uint32_t uuid); + static std::string get_temporary_index_path(const std::string& segment_path, uint32_t uuid, + const std::string& index_suffix_path); + static std::string get_index_file_name(const std::string& path, uint32_t uuid, + const std::string& index_suffix_path); static const std::string get_temporary_null_bitmap_file_name() { return "null_bitmap"; } static const std::string get_temporary_bkd_index_data_file_name() { return "bkd"; } static const std::string get_temporary_bkd_index_meta_file_name() { return "bkd_meta"; } static const std::string get_temporary_bkd_index_file_name() { return "bkd_index"; } static std::string inverted_index_file_path(const std::string& rowset_dir, const RowsetId& rowset_id, int segment_id, - int64_t index_id); + int64_t index_id, + const std::string& index_suffix_path); static std::string local_inverted_index_path_segcompacted(const std::string& tablet_path, const RowsetId& rowset_id, int64_t begin, int64_t end, - int64_t index_id); + int64_t index_id, + const std::string& index_suffix_path); }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index e870680ef5..49f0365c2b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -188,8 +188,8 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach // try to get query bitmap result from cache and return immediately on cache hit io::Path path(_path); auto index_dir = path.parent_path(); - auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), - _index_meta.index_id()); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name( + path.filename(), _index_meta.index_id(), _index_meta.get_index_suffix()); auto index_file_path = index_dir / index_file_name; InvertedIndexQueryCache::CacheKey cache_key { index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY, "null_bitmap"}; @@ -251,8 +251,8 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run io::Path path(_path); auto index_dir = path.parent_path(); - auto index_file_name = - InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_meta.index_id()); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name( + path.filename(), _index_meta.index_id(), _index_meta.get_index_suffix()); auto index_file_path = index_dir / index_file_name; InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared(); inverted_index_ctx->parser_type = get_inverted_index_parser_type_from_string( @@ -525,8 +525,8 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, io::Path path(_path); auto index_dir = path.parent_path(); - auto index_file_name = - InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_meta.index_id()); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name( + path.filename(), _index_meta.index_id(), _index_meta.get_index_suffix()); auto index_file_path = index_dir / index_file_name; // try to get query bitmap result from cache and return immediately on cache hit @@ -642,8 +642,8 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path, : InvertedIndexReader(fs, path, index_meta), _compoundReader(nullptr) { io::Path io_path(_path); auto index_dir = io_path.parent_path(); - auto index_file_name = InvertedIndexDescriptor::get_index_file_name(io_path.filename(), - index_meta->index_id()); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name( + io_path.filename(), index_meta->index_id(), index_meta->get_index_suffix()); // check index file existence auto index_file = index_dir / index_file_name; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 5c53a91c3d..8dabf80dfb 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -117,7 +117,8 @@ public: if (config::enable_write_index_searcher_cache) { // open index searcher into cache auto index_file_name = InvertedIndexDescriptor::get_index_file_name( - _segment_file_name, _index_meta->index_id()); + _segment_file_name, _index_meta->index_id(), + _index_meta->get_index_suffix()); static_cast(InvertedIndexSearcherCache::instance()->insert(_fs, _directory, index_file_name)); } @@ -139,7 +140,8 @@ public: bool create = true; auto index_path = InvertedIndexDescriptor::get_temporary_index_path( - _directory + "/" + _segment_file_name, _index_meta->index_id()); + _directory + "/" + _segment_file_name, _index_meta->index_id(), + _index_meta->get_index_suffix()); // LOG(INFO) << "inverted index path: " << index_path; bool exists = false; @@ -429,8 +431,8 @@ public: int64_t file_size() const override { std::filesystem::path dir(_directory); dir /= _segment_file_name; - auto file_name = - InvertedIndexDescriptor::get_index_file_name(dir.string(), _index_meta->index_id()); + auto file_name = InvertedIndexDescriptor::get_index_file_name( + dir.string(), _index_meta->index_id(), _index_meta->get_index_suffix()); int64_t size = -1; auto st = _fs->file_size(file_name.c_str(), &size); if (!st.ok()) { @@ -465,7 +467,8 @@ public: // write bkd file if constexpr (field_is_numeric_type(field_type)) { auto index_path = InvertedIndexDescriptor::get_temporary_index_path( - _directory + "/" + _segment_file_name, _index_meta->index_id()); + _directory + "/" + _segment_file_name, _index_meta->index_id(), + _index_meta->get_index_suffix()); dir = DorisCompoundDirectory::getDirectory(_fs, index_path.c_str(), true); write_null_bitmap(null_bitmap_out, dir); _bkd_writer->max_doc_ = _rid; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index a8c2c095e1..32e4683576 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -34,9 +34,11 @@ #include "olap/block_column_predicate.h" #include "olap/column_predicate.h" #include "olap/iterators.h" +#include "olap/olap_common.h" #include "olap/primary_key_index.h" #include "olap/rowset/rowset_reader_context.h" #include "olap/rowset/segment_v2/empty_segment_iterator.h" +#include "olap/rowset/segment_v2/hierarchical_data_reader.h" #include "olap/rowset/segment_v2/indexed_column_reader.h" #include "olap/rowset/segment_v2/page_io.h" #include "olap/rowset/segment_v2/page_pointer.h" @@ -48,6 +50,7 @@ #include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/define_primitive_type.h" #include "runtime/memory/mem_tracker.h" #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" @@ -60,6 +63,8 @@ #include "vec/common/string_ref.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_object.h" #include "vec/olap/vgeneric_iterators.h" namespace doris { @@ -117,11 +122,15 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (_tablet_schema->num_columns() <= column_id) { continue; } + // TODO handle var path int32_t uid = read_options.tablet_schema->column(column_id).unique_id(); if (_column_readers.count(uid) < 1 || !_column_readers.at(uid)->has_zone_map()) { continue; } if (read_options.col_id_to_predicates.count(column_id) > 0 && + can_apply_predicate_safely(column_id, + read_options.col_id_to_predicates.at(column_id).get(), + *schema, read_options.io_ctx.reader_type) && !_column_readers.at(uid)->match_condition(entry.second.get())) { // any condition not satisfied, return. iter->reset(new EmptySegmentIterator(*schema)); @@ -133,12 +142,15 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o auto query_ctx = read_options.runtime_state->get_query_ctx(); auto runtime_predicate = query_ctx->get_runtime_predicate().get_predictate(); if (runtime_predicate) { + // TODO handle var path int32_t uid = read_options.tablet_schema->column(runtime_predicate->column_id()).unique_id(); AndBlockColumnPredicate and_predicate; auto single_predicate = new SingleColumnBlockPredicate(runtime_predicate.get()); and_predicate.add_column_predicate(single_predicate); - if (!_column_readers.at(uid)->match_condition(&and_predicate)) { + if (can_apply_predicate_safely(runtime_predicate->column_id(), runtime_predicate.get(), + *schema, read_options.io_ctx.reader_type) && + !_column_readers.at(uid)->match_condition(&and_predicate)) { // any condition not satisfied, return. iter->reset(new EmptySegmentIterator(*schema)); read_options.stats->filtered_segment_number++; @@ -318,14 +330,42 @@ Status Segment::_load_index_impl() { }); } +// Return the storage datatype of related column to field. +// Return nullptr meaning no such storage infomation for this column +vectorized::DataTypePtr Segment::get_data_type_of(const Field& field, bool ignore_children) const { + // Path has higher priority + if (!field.path().empty()) { + auto node = _sub_column_tree.find_leaf(field.path()); + if (node) { + if (ignore_children || node->children.empty()) { + return node->data.file_column_type; + } + } + // it contains children or column missing in storage, so treat it as variant + return field.is_nullable() + ? vectorized::make_nullable(std::make_shared()) + : std::make_shared(); + } + // TODO support normal column type + return nullptr; +} Status Segment::_create_column_readers(const SegmentFooterPB& footer) { std::unordered_map column_id_to_footer_ordinal; - + std::unordered_map + column_path_to_footer_ordinal; for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) { auto& column_pb = footer.columns(ordinal); - column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal); + if (column_pb.has_column_path_info()) { + // column path + vectorized::PathInData path; + path.from_protobuf(column_pb.column_path_info()); + column_path_to_footer_ordinal.emplace(path, ordinal); + } else { + // unique id + column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal); + } } - + // init by unique_id for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { auto& column = _tablet_schema->column(ordinal); auto iter = column_id_to_footer_ordinal.find(column.unique_id()); @@ -341,6 +381,115 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { _file_reader, &reader)); _column_readers.emplace(column.unique_id(), std::move(reader)); } + + // init by column path + for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { + auto& column = _tablet_schema->column(ordinal); + auto iter = column_path_to_footer_ordinal.find(column.path_info()); + if (iter == column_path_to_footer_ordinal.end()) { + continue; + } + ColumnReaderOptions opts; + opts.kept_in_memory = _tablet_schema->is_in_memory(); + std::unique_ptr reader; + RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(), + _file_reader, &reader)); + _sub_column_tree.add( + iter->first, + SubcolumnReader {std::move(reader), + vectorized::DataTypeFactory::instance().create_data_type( + footer.columns(iter->second))}); + } + return Status::OK(); +} + +static Status new_default_iterator(const TabletColumn& tablet_column, + std::unique_ptr* iter) { + if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { + return Status::InternalError("invalid nonexistent column without default value."); + } + auto type_info = get_type_info(&tablet_column); + std::unique_ptr default_value_iter(new DefaultValueColumnIterator( + tablet_column.has_default_value(), tablet_column.default_value(), + tablet_column.is_nullable(), std::move(type_info), tablet_column.precision(), + tablet_column.frac())); + ColumnIteratorOptions iter_opts; + + RETURN_IF_ERROR(default_value_iter->init(iter_opts)); + *iter = std::move(default_value_iter); + return Status::OK(); +} + +Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, + std::unique_ptr* iter, + StorageReadOptions* opt) { + vectorized::PathInData root_path; + if (tablet_column.path_info().empty()) { + // Missing path info, but need read the whole variant column + root_path = vectorized::PathInData(tablet_column.name_lower_case()); + } else { + root_path = vectorized::PathInData({tablet_column.path_info().get_parts()[0]}); + } + auto root = _sub_column_tree.find_leaf(root_path); + auto node = _sub_column_tree.find_exact(tablet_column.path_info()); + if (opt->io_ctx.reader_type == ReaderType::READER_ALTER_TABLE) { + CHECK(tablet_column.is_variant_type()); + if (node == nullptr) { + // No such variant column in this segment, get a default one + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); + return Status::OK(); + } + bool output_as_raw_json = true; + // Alter table operation should read the whole variant column, since it does not aware of + // subcolumns of variant during processing rewriting rowsets. + // This is slow, since it needs to read all sub columns and merge them into a single column + RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root, output_as_raw_json)); + return Status::OK(); + } + + if (opt->io_ctx.reader_type != ReaderType::READER_QUERY) { + // Could be compaction ..etc and read flat leaves nodes data + auto node = _sub_column_tree.find_leaf(tablet_column.path_info()); + if (!node) { + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); + return Status::OK(); + } + ColumnIterator* it; + RETURN_IF_ERROR(node->data.reader->new_iterator(&it)); + iter->reset(it); + return Status::OK(); + } + + // Init iterators with extra path info. + // TODO If this segment does not contain any data correspond to the relatate path, + // then we could optimize to generate a default iterator + // This file doest not contain this column, so only read from sparse column + // to avoid read amplification + if (node != nullptr && node->is_scalar() && node->children.empty()) { + // Direct read extracted columns + const auto* node = _sub_column_tree.find_leaf(tablet_column.path_info()); + ColumnIterator* it; + RETURN_IF_ERROR(node->data.reader->new_iterator(&it)); + iter->reset(it); + } else if (node != nullptr && !node->children.empty()) { + // Create reader with hirachical data + RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root)); + } else { + // If file only exist column `v.a` and `v` but target path is `v.b`, read only read and parse root column + if (root == nullptr) { + // No such variant column in this segment, get a default one + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); + return Status::OK(); + } + ColumnIterator* it; + RETURN_IF_ERROR(root->data.reader->new_iterator(&it)); + auto stream_iter = new ExtractReader( + tablet_column, + std::make_unique(root->data.file_column_type->create_column(), + std::unique_ptr(it), + root->data.file_column_type)); + iter->reset(stream_iter); + } return Status::OK(); } @@ -352,36 +501,53 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { // but in the old schema column b's cid == 2 // but they are not the same column Status Segment::new_column_iterator(const TabletColumn& tablet_column, - std::unique_ptr* iter) { + std::unique_ptr* iter, + StorageReadOptions* opt) { + // init column iterator by path info + if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) { + return new_column_iterator_with_path(tablet_column, iter, opt); + } + // init default iterator if (_column_readers.count(tablet_column.unique_id()) < 1) { - if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { - return Status::InternalError("invalid nonexistent column without default value."); - } - auto type_info = get_type_info(&tablet_column); - std::unique_ptr default_value_iter( - new DefaultValueColumnIterator(tablet_column.has_default_value(), - tablet_column.default_value(), - tablet_column.is_nullable(), std::move(type_info), - tablet_column.precision(), tablet_column.frac())); - ColumnIteratorOptions iter_opts; - - RETURN_IF_ERROR(default_value_iter->init(iter_opts)); - *iter = std::move(default_value_iter); + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); return Status::OK(); } + // init iterator by unique id ColumnIterator* it; RETURN_IF_ERROR(_column_readers.at(tablet_column.unique_id())->new_iterator(&it)); iter->reset(it); return Status::OK(); } +Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr* iter) { + ColumnIterator* it; + RETURN_IF_ERROR(_column_readers.at(unique_id)->new_iterator(&it)); + iter->reset(it); + return Status::OK(); +} + +ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { + // init column iterator by path info + if (!col.path_info().empty() || col.is_variant_type()) { + auto node = _sub_column_tree.find_exact(col.path_info()); + if (node != nullptr) { + return node->data.reader.get(); + } + return nullptr; + } + auto col_unique_id = col.unique_id(); + if (_column_readers.count(col_unique_id) > 0) { + return _column_readers[col_unique_id].get(); + } + return nullptr; +} + Status Segment::new_bitmap_index_iterator(const TabletColumn& tablet_column, std::unique_ptr* iter) { - auto col_unique_id = tablet_column.unique_id(); - if (_column_readers.count(col_unique_id) > 0 && - _column_readers.at(col_unique_id)->has_bitmap_index()) { + ColumnReader* reader = _get_column_reader(tablet_column); + if (reader != nullptr && reader->has_bitmap_index()) { BitmapIndexIterator* it; - RETURN_IF_ERROR(_column_readers.at(col_unique_id)->new_bitmap_index_iterator(&it)); + RETURN_IF_ERROR(reader->new_bitmap_index_iterator(&it)); iter->reset(it); return Status::OK(); } @@ -392,10 +558,9 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, const TabletIndex* index_meta, const StorageReadOptions& read_options, std::unique_ptr* iter) { - auto col_unique_id = tablet_column.unique_id(); - if (_column_readers.count(col_unique_id) > 0 && index_meta) { - RETURN_IF_ERROR(_column_readers.at(col_unique_id) - ->new_inverted_index_iterator(index_meta, read_options, iter)); + ColumnReader* reader = _get_column_reader(tablet_column); + if (reader != nullptr && index_meta) { + RETURN_IF_ERROR(reader->new_inverted_index_iterator(index_meta, read_options, iter)); return Status::OK(); } return Status::OK(); @@ -515,5 +680,21 @@ Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { return Status::OK(); } +bool Segment::same_with_storage_type(int32_t cid, const Schema& schema, + bool ignore_children) const { + auto file_column_type = get_data_type_of(*schema.column(cid), ignore_children); + auto expected_type = Schema::get_data_type_ptr(*schema.column(cid)); +#ifndef NDEBUG + if (file_column_type && !file_column_type->equals(*expected_type)) { + VLOG_DEBUG << fmt::format("Get column {}, file column type {}, exepected type {}", + schema.column(cid)->name(), file_column_type->get_name(), + expected_type->get_name()); + } +#endif + bool same = + (!file_column_type) || (file_column_type && file_column_type->equals(*expected_type)); + return same; +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index d24381a8fb..5dd003a1df 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -32,15 +32,23 @@ #include "common/status.h" // Status #include "io/fs/file_reader_writer_fwd.h" #include "io/fs/file_system.h" +#include "olap/field.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/column_reader.h" // ColumnReader +#include "olap/rowset/segment_v2/hierarchical_data_reader.h" #include "olap/rowset/segment_v2/page_handle.h" #include "olap/schema.h" #include "olap/tablet_schema.h" #include "util/once.h" #include "util/slice.h" +#include "vec/columns/subcolumn_tree.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" namespace doris { +namespace vectorized { +class IDataType; +} class ShortKeyIndexDecoder; class Schema; @@ -71,7 +79,6 @@ public: RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, std::shared_ptr* output); - ~Segment(); Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options, @@ -84,7 +91,14 @@ public: uint32_t num_rows() const { return _num_rows; } Status new_column_iterator(const TabletColumn& tablet_column, - std::unique_ptr* iter); + std::unique_ptr* iter, + StorageReadOptions* opt = nullptr); + + Status new_column_iterator_with_path(const TabletColumn& tablet_column, + std::unique_ptr* iter, + StorageReadOptions* opt = nullptr); + + Status new_column_iterator(int32_t unique_id, std::unique_ptr* iter); Status new_bitmap_index_iterator(const TabletColumn& tablet_column, std::unique_ptr* iter); @@ -128,6 +142,40 @@ public: void remove_from_segment_cache() const; + // Get the inner file column's data type + // ignore_chidren set to false will treat field as variant + // when it contains children with field paths. + // nullptr will returned if storage type does not contains such column + std::shared_ptr get_data_type_of(const Field& filed, + bool ignore_children) const; + + // Check is schema read type equals storage column type + bool same_with_storage_type(int32_t cid, const Schema& schema, bool ignore_children) const; + + // If column in segment is the same type in schema, then it is safe to apply predicate + template + bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema& schema, + ReaderType read_type) const { + const Field* col = schema.column(cid); + vectorized::DataTypePtr storage_column_type = + get_data_type_of(*col, read_type != ReaderType::READER_QUERY); + if (storage_column_type == nullptr) { + // Default column iterator + return true; + } + if (vectorized::WhichDataType(vectorized::remove_nullable(storage_column_type)) + .is_variant_type()) { + // Predicate should nerver apply on variant type + return false; + } + bool safe = + pred->can_do_apply_safely(storage_column_type->get_type_as_type_descriptor().type, + storage_column_type->is_nullable()); + // Currently only variant column can lead to unsafe + CHECK(safe || col->type() == FieldType::OLAP_FIELD_TYPE_VARIANT); + return safe; + } + private: DISALLOW_COPY_AND_ASSIGN(Segment); Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema); @@ -136,6 +184,7 @@ private: Status _parse_footer(SegmentFooterPB* footer); Status _create_column_readers(const SegmentFooterPB& footer); Status _load_pk_bloom_filter(); + ColumnReader* _get_column_reader(const TabletColumn& col); Status _load_index_impl(); @@ -158,6 +207,14 @@ private: // after this segment is generated. std::map> _column_readers; + // Init from ColumnMetaPB in SegmentFooterPB + // map column unique id ---> it's inner data type + std::map> _file_column_types; + + // Each node in the tree represents the sub column reader and type + // for variants. + SubcolumnColumnReaders _sub_column_tree; + // used to guarantee that short key index will be loaded at most once in a thread-safe way DorisCallOnce _load_index_once; // used to guarantee that primary key bloom filter will be loaded at most once in a thread-safe way diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d0ffcfdd6d..4275370618 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -34,6 +34,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/consts.h" +#include "common/exception.h" #include "common/logging.h" #include "common/object_pool.h" #include "common/status.h" @@ -57,6 +58,7 @@ #include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/define_primitive_type.h" #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" @@ -68,21 +70,25 @@ #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" +#include "vec/columns/column_object.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" #include "vec/common/string_ref.h" #include "vec/common/typeid_cast.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/field.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_number.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" #include "vec/exprs/vliteral.h" #include "vec/exprs/vslot_ref.h" +#include "vec/json/path_in_data.h" namespace doris { using namespace ErrorCode; @@ -278,7 +284,12 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _file_reader = _segment->_file_reader; _opts = opts; _col_predicates.clear(); + for (auto& predicate : opts.column_predicates) { + if (!_segment->can_apply_predicate_safely(predicate->column_id(), predicate, *_schema, + _opts.io_ctx.reader_type)) { + continue; + } if (predicate->need_to_clone()) { ColumnPredicate* cloned; predicate->clone(&cloned); @@ -291,8 +302,15 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _tablet_id = opts.tablet_id; // Read options will not change, so that just resize here _block_rowids.resize(_opts.block_row_max); - if (!opts.column_predicates_except_leafnode_of_andnode.empty()) { - _col_preds_except_leafnode_of_andnode = opts.column_predicates_except_leafnode_of_andnode; + + // compound predicates + _col_preds_except_leafnode_of_andnode.clear(); + for (auto& predicate : opts.column_predicates_except_leafnode_of_andnode) { + if (!_segment->can_apply_predicate_safely(predicate->column_id(), predicate, *_schema, + _opts.io_ctx.reader_type)) { + continue; + } + _col_preds_except_leafnode_of_andnode.push_back(predicate); } _remaining_conjunct_roots = opts.remaining_conjunct_roots; @@ -311,12 +329,26 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { RETURN_IF_ERROR(init_iterators()); if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) { + _is_char_type.resize(_schema->columns().size(), false); _vec_init_char_column_id(); } if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); } + + _storage_name_and_type.resize(_schema->columns().size()); + for (int i = 0; i < _schema->columns().size(); ++i) { + const Field* col = _schema->column(i); + if (col) { + auto storage_type = _segment->get_data_type_of( + *col, _opts.io_ctx.reader_type != ReaderType::READER_QUERY); + if (storage_type == nullptr) { + storage_type = vectorized::DataTypeFactory::instance().create_data_type(*col); + } + _storage_name_and_type[i] = std::make_pair(col->name(), storage_type); + } + } return Status::OK(); } @@ -501,9 +533,13 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row // bloom filter index only use CondColumn RowRanges bf_row_ranges = RowRanges::create_single(num_rows()); for (auto& cid : cids) { + DCHECK(_opts.col_id_to_predicates.count(cid) > 0); + if (!_segment->can_apply_predicate_safely(cid, _opts.col_id_to_predicates.at(cid).get(), + *_schema, _opts.io_ctx.reader_type)) { + continue; + } // get row ranges by bf index of this column, RowRanges column_bf_row_ranges = RowRanges::create_single(num_rows()); - DCHECK(_opts.col_id_to_predicates.count(cid) > 0); RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_bloom_filter( _opts.col_id_to_predicates.at(cid).get(), &column_bf_row_ranges)); RowRanges::ranges_intersection(bf_row_ranges, column_bf_row_ranges, &bf_row_ranges); @@ -516,9 +552,13 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows()); // second filter data by zone map for (auto& cid : cids) { + DCHECK(_opts.col_id_to_predicates.count(cid) > 0); + if (!_segment->can_apply_predicate_safely(cid, _opts.col_id_to_predicates.at(cid).get(), + *_schema, _opts.io_ctx.reader_type)) { + continue; + } // get row ranges by zone map of this column, RowRanges column_row_ranges = RowRanges::create_single(num_rows()); - DCHECK(_opts.col_id_to_predicates.count(cid) > 0); RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_zone_map( _opts.col_id_to_predicates.at(cid).get(), _opts.del_predicates_for_zone_map.count(cid) > 0 @@ -534,7 +574,9 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row if (_opts.use_topn_opt) { auto query_ctx = _opts.runtime_state->get_query_ctx(); runtime_predicate = query_ctx->get_runtime_predicate().get_predictate(); - if (runtime_predicate) { + if (runtime_predicate && _segment->can_apply_predicate_safely( + runtime_predicate->column_id(), runtime_predicate.get(), + *_schema, _opts.io_ctx.reader_type)) { AndBlockColumnPredicate and_predicate; auto single_predicate = new SingleColumnBlockPredicate(runtime_predicate.get()); and_predicate.add_column_predicate(single_predicate); @@ -790,8 +832,9 @@ Status SegmentIterator::_apply_bitmap_index_except_leafnode_of_andnode( Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode( ColumnPredicate* pred, roaring::Roaring* output_result) { - RETURN_IF_ERROR(pred->evaluate(*_schema, _inverted_index_iterators[pred->column_id()].get(), - num_rows(), output_result)); + RETURN_IF_ERROR(pred->evaluate(_storage_name_and_type[pred->column_id()], + _inverted_index_iterators[pred->column_id()].get(), num_rows(), + output_result)); return Status::OK(); } @@ -924,8 +967,9 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate( bool need_remaining_after_evaluate = _column_has_fulltext_index(pred->column_id()) && PredicateTypeTraits::is_equal_or_list(pred->type()); roaring::Roaring bitmap = _row_bitmap; - Status res = pred->evaluate(*_schema, _inverted_index_iterators[pred->column_id()].get(), - num_rows(), &bitmap); + Status res = pred->evaluate(_storage_name_and_type[pred->column_id()], + _inverted_index_iterators[pred->column_id()].get(), num_rows(), + &bitmap); if (!res.ok()) { if (_downgrade_without_index(res, need_remaining_after_evaluate)) { remaining_predicates.emplace_back(pred); @@ -1168,7 +1212,7 @@ Status SegmentIterator::_init_return_column_iterators() { if (_column_iterators[cid] == nullptr) { RETURN_IF_ERROR(_segment->new_column_iterator(_opts.tablet_schema->column(cid), - &_column_iterators[cid])); + &_column_iterators[cid], &_opts)); ColumnIteratorOptions iter_opts { .use_page_cache = _opts.use_page_cache, // If the col is predicate column, then should read the last page to check @@ -1202,12 +1246,11 @@ Status SegmentIterator::_init_inverted_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { - int32_t unique_id = _opts.tablet_schema->column(cid).unique_id(); if (_inverted_index_iterators[cid] == nullptr) { RETURN_IF_ERROR(_segment->new_inverted_index_iterator( _opts.tablet_schema->column(cid), - _opts.tablet_schema->get_inverted_index(unique_id), _opts, - &_inverted_index_iterators[cid])); + _opts.tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)), + _opts, &_inverted_index_iterators[cid])); } } return Status::OK(); @@ -1599,6 +1642,11 @@ Status SegmentIterator::_vec_init_lazy_materialization() { bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) { auto cid = predicate->column_id(); FieldType field_type = _schema->column(cid)->type(); + if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) { + // Use variant cast dst type + field_type = TabletColumn::get_field_type_by_type( + _opts.target_cast_type_for_variants[_schema->column(cid)->name()]); + } switch (predicate->type()) { case PredicateType::EQ: case PredicateType::NE: @@ -1654,6 +1702,9 @@ void SegmentIterator::_vec_init_char_column_id() { _char_type_idx_no_0.emplace_back(i); } } + if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { + _is_char_type[cid] = true; + } } } @@ -1700,19 +1751,35 @@ void SegmentIterator::_init_current_block( for (size_t i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); auto column_desc = _schema->column(cid); - // the column in block must clear() here to insert new data - if (_is_pred_column[cid] || - i >= block->columns()) { //todo(wb) maybe we can release it after output block - current_columns[cid]->clear(); - } else { // non-predicate column - current_columns[cid] = std::move(*block->get_by_position(i).column).mutate(); - - if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATE) { - current_columns[cid]->set_date_type(); - } else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATETIME) { - current_columns[cid]->set_datetime_type(); - } + if (!_is_pred_column[cid] && + !_segment->same_with_storage_type( + cid, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) { + // The storage layer type is different from schema needed type, so we use storage + // type to read columns instead of schema type for safety + auto file_column_type = _storage_name_and_type[cid].second; + VLOG_DEBUG << fmt::format( + "Recreate column with expected type {}, file column type {}, col_name {}, " + "col_path {}", + block->get_by_position(i).type->get_name(), file_column_type->get_name(), + column_desc->name(), column_desc->path().get_path()); + // TODO reuse + current_columns[cid] = file_column_type->create_column(); current_columns[cid]->reserve(_opts.block_row_max); + } else { + // the column in block must clear() here to insert new data + if (_is_pred_column[cid] || + i >= block->columns()) { //todo(wb) maybe we can release it after output block + current_columns[cid]->clear(); + } else { // non-predicate column + current_columns[cid] = std::move(*block->get_by_position(i).column).mutate(); + + if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATE) { + current_columns[cid]->set_date_type(); + } else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATETIME) { + current_columns[cid]->set_datetime_type(); + } + current_columns[cid]->reserve(_opts.block_row_max); + } } } } @@ -1959,6 +2026,49 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { return status; } +Status SegmentIterator::_convert_to_expected_type(const std::vector& col_ids) { + for (ColumnId i : col_ids) { + if (_current_return_columns[i] == nullptr || _converted_column_ids[i] || + _is_pred_column[i]) { + continue; + } + if (!_segment->same_with_storage_type( + i, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) { + const Field* field_type = _schema->column(i); + vectorized::DataTypePtr expected_type = Schema::get_data_type_ptr(*field_type); + vectorized::DataTypePtr file_column_type = _storage_name_and_type[i].second; + vectorized::ColumnPtr expected; + vectorized::ColumnPtr original = + _current_return_columns[i]->assume_mutable()->get_ptr(); + RETURN_IF_ERROR(vectorized::schema_util::cast_column({original, file_column_type, ""}, + expected_type, &expected)); + _current_return_columns[i] = expected->assume_mutable(); + _converted_column_ids[i] = 1; + VLOG_DEBUG << fmt::format("Convert {} fom file column type {} to {}, num_rows {}", + field_type->path().get_path(), file_column_type->get_name(), + expected_type->get_name(), + _current_return_columns[i]->size()); + } + } + return Status::OK(); +} + +Status SegmentIterator::copy_column_data_by_selector(vectorized::IColumn* input_col_ptr, + vectorized::MutableColumnPtr& output_col, + uint16_t* sel_rowid_idx, uint16_t select_size, + size_t batch_size) { + output_col->reserve(batch_size); + + // adapt for outer join change column to nullable + if (output_col->is_nullable() && !input_col_ptr->is_nullable()) { + auto col_ptr_nullable = reinterpret_cast(output_col.get()); + col_ptr_nullable->get_null_map_column().insert_many_defaults(select_size); + output_col = col_ptr_nullable->get_nested_column_ptr(); + } + + return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, output_col); +} + Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { bool is_mem_reuse = block->mem_reuse(); DCHECK(is_mem_reuse); @@ -1971,13 +2081,20 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _block_rowids.resize(_opts.block_row_max); } _current_return_columns.resize(_schema->columns().size()); + _converted_column_ids.resize(_schema->columns().size(), 0); for (size_t i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); auto column_desc = _schema->column(cid); if (_is_pred_column[cid]) { - RETURN_IF_CATCH_EXCEPTION(_current_return_columns[cid] = - Schema::get_predicate_column_ptr( - *column_desc, _opts.io_ctx.reader_type)); + auto storage_column_type = _storage_name_and_type[cid].second; + // Char type is special , since char type's computational datatype is same with string, + // both are DataTypeString, but DataTypeString only return FieldType::OLAP_FIELD_TYPE_STRING + // in get_storage_field_type. + RETURN_IF_CATCH_EXCEPTION( + _current_return_columns[cid] = Schema::get_predicate_column_ptr( + _is_char_type[cid] ? FieldType::OLAP_FIELD_TYPE_CHAR + : storage_column_type->get_storage_field_type(), + storage_column_type->is_nullable(), _opts.io_ctx.reader_type)); _current_return_columns[cid]->set_rowset_segment_id( {_segment->rowset_id(), _segment->id()}); _current_return_columns[cid]->reserve(_opts.block_row_max); @@ -1995,8 +2112,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } } - _init_current_block(block, _current_return_columns); + _converted_column_ids.assign(_schema->columns().size(), 0); _current_batch_rows_read = 0; uint32_t nrows_read_limit = _opts.block_row_max; @@ -2019,6 +2136,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _opts.stats->raw_rows_read += _current_batch_rows_read; if (_current_batch_rows_read == 0) { + // Convert all columns in _current_return_columns to schema column + RETURN_IF_ERROR(_convert_to_expected_type(_schema->column_ids())); for (int i = 0; i < block->columns(); i++) { auto cid = _schema->column_id(i); // todo(wb) abstract make column where @@ -2031,6 +2150,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) { + RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids)); + RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns)); _output_non_pred_columns(block); _output_index_result_column(nullptr, 0, block); } else { @@ -2070,6 +2191,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _second_read_column_ids.end()) { _replace_version_col(selected_size); } + RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids)); for (auto cid : _second_read_column_ids) { auto loc = _schema_block_id_map[cid]; block->replace_by_position(loc, @@ -2099,6 +2221,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } } else if (_is_need_expr_eval) { + RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids)); for (auto cid : _second_read_column_ids) { auto loc = _schema_block_id_map[cid]; block->replace_by_position(loc, std::move(_current_return_columns[cid])); @@ -2106,6 +2229,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } else if (_is_need_expr_eval) { DCHECK(!_first_read_column_ids.empty()); + RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids)); // first read all rows are insert block, initialize sel_rowid_idx to all rows. for (auto cid : _first_read_column_ids) { auto loc = _schema_block_id_map[cid]; @@ -2151,7 +2275,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } return Status::OK(); } - // step4: read non_predicate column if (selected_size > 0) { RETURN_IF_ERROR(_read_columns_by_rowids(_non_predicate_columns, _block_rowids, @@ -2163,6 +2286,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } + RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns)); // step5: output columns _output_non_pred_columns(block); @@ -2174,6 +2298,16 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // shrink char_type suffix zero data block->shrink_char_type_column_suffix_zero(_char_type_idx); +#ifndef NDEBUG + size_t rows = block->rows(); + for (const auto& entry : *block) { + if (entry.column->size() != rows) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unmatched size {}, expected {}", + entry.column->size(), rows); + } + } +#endif + if (UNLIKELY(_estimate_row_size) && block->rows() > 0) { _update_max_row(block); } @@ -2181,6 +2315,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // reverse block row order if (_opts.read_orderby_key_reverse) { size_t num_rows = block->rows(); + if (num_rows == 0) { + return Status::OK(); + } size_t num_columns = block->columns(); vectorized::IColumn::Permutation permutation; for (size_t i = 0; i < num_rows; ++i) permutation.emplace_back(num_rows - 1 - i); @@ -2331,9 +2468,10 @@ void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl( void SegmentIterator::_update_max_row(const vectorized::Block* block) { _estimate_row_size = false; auto avg_row_size = block->bytes() / block->rows(); - - int block_row_max = config::doris_scan_block_max_mb / avg_row_size; - _opts.block_row_max = std::min(block_row_max, _opts.block_row_max); + if (avg_row_size > 0) { + int block_row_max = config::doris_scan_block_max_mb / avg_row_size; + _opts.block_row_max = std::min(block_row_max, _opts.block_row_max); + } } Status SegmentIterator::current_block_row_locations(std::vector* block_row_locations) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 219ca61ee1..29f6ab6b5c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -46,7 +46,10 @@ #include "util/runtime_profile.h" #include "util/slice.h" #include "vec/columns/column.h" +#include "vec/common/schema_util.h" #include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/columns_with_type_and_name.h" #include "vec/data_types/data_type.h" namespace doris { @@ -225,6 +228,11 @@ private: uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns); + Status copy_column_data_by_selector(vectorized::IColumn* input_col_ptr, + vectorized::MutableColumnPtr& output_col, + uint16_t* sel_rowid_idx, uint16_t select_size, + size_t batch_size); + template [[nodiscard]] Status _output_column_by_sel_idx(vectorized::Block* block, const Container& column_ids, @@ -232,9 +240,34 @@ private: SCOPED_RAW_TIMER(&_opts.stats->output_col_ns); for (auto cid : column_ids) { int block_cid = _schema_block_id_map[cid]; - RETURN_IF_ERROR(block->copy_column_data_to_block(_current_return_columns[cid].get(), - sel_rowid_idx, select_size, block_cid, + // Only the additional deleted filter condition need to materialize column be at the end of the block + // We should not to materialize the column of query engine do not need. So here just return OK. + // Eg: + // `delete from table where a = 10;` + // `select b from table;` + // a column only effective in segment iterator, the block from query engine only contain the b column. + // so the `block_cid >= data.size()` is true + if (block_cid >= block->columns()) { + continue; + } + vectorized::DataTypePtr storage_type = + _segment->get_data_type_of(*_schema->column(cid), false); + if (storage_type && !storage_type->equals(*block->get_by_position(block_cid).type)) { + // Do additional cast + vectorized::MutableColumnPtr tmp = storage_type->create_column(); + RETURN_IF_ERROR(copy_column_data_by_selector(_current_return_columns[cid].get(), + tmp, sel_rowid_idx, select_size, _opts.block_row_max)); + RETURN_IF_ERROR(vectorized::schema_util::cast_column( + {tmp->get_ptr(), storage_type, ""}, block->get_by_position(block_cid).type, + &block->get_by_position(block_cid).column)); + } else { + vectorized::MutableColumnPtr output_column = + block->get_by_position(block_cid).column->assume_mutable(); + RETURN_IF_ERROR(copy_column_data_by_selector(_current_return_columns[cid].get(), + output_column, sel_rowid_idx, + select_size, _opts.block_row_max)); + } } return Status::OK(); } @@ -328,11 +361,16 @@ private: return 0; } + Status _convert_to_expected_type(const std::vector& col_ids); + class BitmapRangeIterator; class BackwardBitmapRangeIterator; std::shared_ptr _segment; + // read schema from scanner SchemaSPtr _schema; + // storage type schema related to _schema, since column in segment may be different with type in _schema + std::vector _storage_name_and_type; // vector idx -> column iterarator std::vector> _column_iterators; std::vector> _bitmap_index_iterators; @@ -379,6 +417,7 @@ private: std::vector _first_read_column_ids; std::vector _second_read_column_ids; std::vector _columns_to_filter; + std::vector _converted_column_ids; std::vector _schema_block_id_map; // map from schema column id to column idx in Block // the actual init process is delayed to the first call to next_batch() @@ -418,6 +457,7 @@ private: // char_type or array type columns cid std::vector _char_type_idx; std::vector _char_type_idx_no_0; + std::vector _is_char_type; // number of rows read in the current batch uint32_t _current_batch_rows_read = 0; @@ -432,6 +472,8 @@ private: bool _record_rowids = false; int32_t _tablet_id = 0; std::set _output_columns; + + std::unique_ptr _path_reader; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 90663090f0..ac70ef8c1f 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -139,7 +139,9 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t column_id, meta->set_default_value(column.default_value()); meta->set_precision(column.precision()); meta->set_frac(column.frac()); - column.path_info().to_protobuf(meta->mutable_column_path_info(), column.parent_unique_id()); + if (!column.path_info().empty()) { + column.path_info().to_protobuf(meta->mutable_column_path_info(), column.parent_unique_id()); + } meta->set_unique_id(column.unique_id()); for (uint32_t i = 0; i < column.get_subtype_count(); ++i) { init_column_meta(meta->add_children_columns(), column_id, column.get_sub_column(i), @@ -199,7 +201,12 @@ Status SegmentWriter::init(const std::vector& col_ids, bool has_key) { skip_inverted_index = true; } // indexes for this column - opts.indexes = _tablet_schema->get_indexes_for_column(column.unique_id()); + opts.indexes = _tablet_schema->get_indexes_for_column(column); + if (column.is_variant_type() || (column.is_extracted_column() && column.is_jsonb_type()) || + (column.is_extracted_column() && column.is_array_type())) { + // variant and jsonb type skip write index + opts.indexes.clear(); + } for (auto index : opts.indexes) { if (!skip_inverted_index && index && index->index_type() == IndexType::INVERTED) { opts.inverted_index = index; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 4b0df03d35..91f890b7f9 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -106,12 +106,18 @@ VerticalSegmentWriter::~VerticalSegmentWriter() { void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column) { meta->set_column_id(column_id); - meta->set_unique_id(column.unique_id()); meta->set_type(int(column.type())); meta->set_length(column.length()); meta->set_encoding(DEFAULT_ENCODING); meta->set_compression(_opts.compression_type); meta->set_is_nullable(column.is_nullable()); + meta->set_default_value(column.default_value()); + meta->set_precision(column.precision()); + meta->set_frac(column.frac()); + if (!column.path_info().empty()) { + column.path_info().to_protobuf(meta->mutable_column_path_info(), column.parent_unique_id()); + } + meta->set_unique_id(column.unique_id()); for (uint32_t i = 0; i < column.get_subtype_count(); ++i) { _init_column_meta(meta->add_children_columns(), column_id, column.get_sub_column(i)); } @@ -148,7 +154,12 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo skip_inverted_index = true; } // indexes for this column - opts.indexes = _tablet_schema->get_indexes_for_column(column.unique_id()); + opts.indexes = _tablet_schema->get_indexes_for_column(column); + if (column.is_variant_type() || (column.is_extracted_column() && column.is_jsonb_type()) || + (column.is_extracted_column() && column.is_array_type())) { + // variant and jsonb type skip write index + opts.indexes.clear(); + } for (auto index : opts.indexes) { if (!skip_inverted_index && index && index->index_type() == IndexType::INVERTED) { opts.inverted_index = index; @@ -156,57 +167,31 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo break; } } - if (column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) { - opts.need_zone_map = false; - if (opts.need_bloom_filter) { - return Status::NotSupported("Do not support bloom filter for struct type"); - } - if (opts.need_bitmap_index) { - return Status::NotSupported("Do not support bitmap index for struct type"); - } - } - if (column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { - opts.need_zone_map = false; - if (opts.need_bloom_filter) { - return Status::NotSupported("Do not support bloom filter for array type"); - } - if (opts.need_bitmap_index) { - return Status::NotSupported("Do not support bitmap index for array type"); - } - } - if (column.type() == FieldType::OLAP_FIELD_TYPE_JSONB) { - opts.need_zone_map = false; - if (opts.need_bloom_filter) { - return Status::NotSupported("Do not support bloom filter for jsonb type"); - } - if (opts.need_bitmap_index) { - return Status::NotSupported("Do not support bitmap index for jsonb type"); - } - } - if (column.type() == FieldType::OLAP_FIELD_TYPE_AGG_STATE) { - opts.need_zone_map = false; - if (opts.need_bloom_filter) { - return Status::NotSupported("Do not support bloom filter for agg_state type"); - } - if (opts.need_bitmap_index) { - return Status::NotSupported("Do not support bitmap index for agg_state type"); - } - } - if (column.type() == FieldType::OLAP_FIELD_TYPE_MAP) { - opts.need_zone_map = false; - if (opts.need_bloom_filter) { - return Status::NotSupported("Do not support bloom filter for map type"); - } - if (opts.need_bitmap_index) { - return Status::NotSupported("Do not support bitmap index for map type"); - } + +#define CHECK_FIELD_TYPE(TYPE, type_name) \ + if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ + opts.need_zone_map = false; \ + if (opts.need_bloom_filter) { \ + return Status::NotSupported("Do not support bloom filter for " type_name " type"); \ + } \ + if (opts.need_bitmap_index) { \ + return Status::NotSupported("Do not support bitmap index for " type_name " type"); \ + } \ } + CHECK_FIELD_TYPE(STRUCT, "struct") + CHECK_FIELD_TYPE(ARRAY, "array") + CHECK_FIELD_TYPE(JSONB, "jsonb") + CHECK_FIELD_TYPE(AGG_STATE, "agg_state") + CHECK_FIELD_TYPE(MAP, "map") + CHECK_FIELD_TYPE(VARIANT, "variant") + +#undef CHECK_FIELD_TYPE + if (column.is_row_store_column()) { // smaller page size for row store column opts.data_page_size = config::row_column_page_size; } - std::unique_ptr writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); RETURN_IF_ERROR(writer->init()); diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index 219d344f62..395d37f715 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -194,6 +194,7 @@ Status RowsetBuilder::init() { context.rowset_state = PREPARED; context.segments_overlap = OVERLAPPING; context.tablet_schema = _tablet_schema; + context.original_tablet_schema = _tablet_schema; context.newest_write_timestamp = UnixSeconds(); context.tablet_id = _tablet->tablet_id(); context.tablet = _tablet; diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index c59f49cb3b..5cfa1d5e38 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -25,6 +25,7 @@ #include #include "common/config.h" +#include "olap/olap_common.h" #include "runtime/define_primitive_type.h" #include "util/trace.h" #include "vec/columns/column_array.h" @@ -130,10 +131,11 @@ vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) return get_data_type_ptr(field)->create_column(); } -vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field, +vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const FieldType& type, + bool is_nullable, const ReaderType reader_type) { vectorized::IColumn::MutablePtr ptr = nullptr; - switch (field.type()) { + switch (type) { case FieldType::OLAP_FIELD_TYPE_BOOL: ptr = doris::vectorized::PredicateColumnType::create(); break; @@ -172,8 +174,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi break; case FieldType::OLAP_FIELD_TYPE_CHAR: if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) { - ptr = doris::vectorized::ColumnDictionary::create( - field.type()); + ptr = doris::vectorized::ColumnDictionary::create(type); } else { ptr = doris::vectorized::PredicateColumnType::create(); } @@ -182,8 +183,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi case FieldType::OLAP_FIELD_TYPE_STRING: case FieldType::OLAP_FIELD_TYPE_JSONB: if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) { - ptr = doris::vectorized::ColumnDictionary::create( - field.type()); + ptr = doris::vectorized::ColumnDictionary::create(type); } else { ptr = doris::vectorized::PredicateColumnType::create(); } @@ -210,12 +210,12 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi ptr = doris::vectorized::PredicateColumnType::create(); break; default: - throw Exception(ErrorCode::SCHEMA_SCHEMA_FIELD_INVALID, - fmt::format("Unexpected type when choosing predicate column, type={}", - int(field.type()))); + throw Exception( + ErrorCode::SCHEMA_SCHEMA_FIELD_INVALID, + fmt::format("Unexpected type when choosing predicate column, type={}", int(type))); } - if (field.is_nullable()) { + if (is_nullable) { return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create()); } diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index ce5c6705e7..3f2c2792b1 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -147,7 +147,8 @@ public: static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field); - static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const Field& field, + static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const FieldType& type, + bool is_nullable, const ReaderType reader_type); const std::vector& columns() const { return _cols; } diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 9d0e23cb97..e29e1f277d 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -606,6 +606,7 @@ Result> VSchemaChangeWithSorting: context.rowset_state = VISIBLE; context.segments_overlap = segments_overlap; context.tablet_schema = new_tablet->tablet_schema(); + context.original_tablet_schema = new_tablet->tablet_schema(); context.newest_write_timestamp = newest_write_timestamp; context.write_type = DataWriteType::TYPE_SCHEMA_CHANGE; std::unique_ptr rowset_writer; @@ -1110,6 +1111,7 @@ Status SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams context.rowset_state = VISIBLE; context.segments_overlap = rs_reader->rowset()->rowset_meta()->segments_overlap(); context.tablet_schema = new_tablet->tablet_schema(); + context.original_tablet_schema = new_tablet->tablet_schema(); context.newest_write_timestamp = rs_reader->newest_write_timestamp(); context.fs = rs_reader->rowset()->rowset_meta()->fs(); context.write_type = DataWriteType::TYPE_SCHEMA_CHANGE; @@ -1300,8 +1302,8 @@ Status SchemaChangeHandler::_parse_request(const SchemaChangeParams& sc_params, column_new.length() != column_old.length() || column_new.is_bf_column() != column_old.is_bf_column() || column_new.has_bitmap_index() != column_old.has_bitmap_index() || - new_tablet_schema->has_inverted_index(column_new.unique_id()) != - base_tablet_schema->has_inverted_index(column_old.unique_id())) { + new_tablet_schema->has_inverted_index(column_new) != + base_tablet_schema->has_inverted_index(column_old)) { *sc_directly = true; return Status::OK(); } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index e2ed1bb88d..fad40012b0 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -641,9 +641,7 @@ TabletSchemaSPtr Tablet::tablet_schema_with_merged_max_schema_version( std::vector schemas; std::transform(rowset_metas.begin(), rowset_metas.end(), std::back_inserter(schemas), [](const RowsetMetaSharedPtr& rs_meta) { return rs_meta->tablet_schema(); }); - target_schema = std::make_shared(); - // TODO(lhy) maybe slow? - vectorized::schema_util::get_least_common_schema(schemas, target_schema); + target_schema = vectorized::schema_util::get_least_common_schema(schemas, nullptr); VLOG_DEBUG << "dump schema: " << target_schema->dump_structure(); } return target_schema; @@ -1342,7 +1340,7 @@ std::vector Tablet::pick_candidate_rowsets_to_build_inverted_in std::shared_lock rlock(_meta_lock); auto has_alter_inverted_index = [&](RowsetSharedPtr rowset) -> bool { for (const auto& index_id : alter_index_uids) { - if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id)) { + if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id, "")) { return true; } } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index af163275b3..85203b0b12 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -39,12 +39,88 @@ #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/aggregate_function_state_union.h" +#include "vec/common/hex.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" namespace doris { +FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { + switch (primitiveType) { + case PrimitiveType::INVALID_TYPE: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; + case PrimitiveType::TYPE_NULL: + return FieldType::OLAP_FIELD_TYPE_NONE; + case PrimitiveType::TYPE_BOOLEAN: + return FieldType::OLAP_FIELD_TYPE_BOOL; + case PrimitiveType::TYPE_TINYINT: + return FieldType::OLAP_FIELD_TYPE_TINYINT; + case PrimitiveType::TYPE_SMALLINT: + return FieldType::OLAP_FIELD_TYPE_SMALLINT; + case PrimitiveType::TYPE_INT: + return FieldType::OLAP_FIELD_TYPE_INT; + case PrimitiveType::TYPE_BIGINT: + return FieldType::OLAP_FIELD_TYPE_BIGINT; + case PrimitiveType::TYPE_LARGEINT: + return FieldType::OLAP_FIELD_TYPE_LARGEINT; + case PrimitiveType::TYPE_FLOAT: + return FieldType::OLAP_FIELD_TYPE_FLOAT; + case PrimitiveType::TYPE_DOUBLE: + return FieldType::OLAP_FIELD_TYPE_DOUBLE; + case PrimitiveType::TYPE_VARCHAR: + return FieldType::OLAP_FIELD_TYPE_VARCHAR; + case PrimitiveType::TYPE_DATE: + return FieldType::OLAP_FIELD_TYPE_DATE; + case PrimitiveType::TYPE_DATETIME: + return FieldType::OLAP_FIELD_TYPE_DATETIME; + case PrimitiveType::TYPE_BINARY: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented + case PrimitiveType::TYPE_CHAR: + return FieldType::OLAP_FIELD_TYPE_CHAR; + case PrimitiveType::TYPE_STRUCT: + return FieldType::OLAP_FIELD_TYPE_STRUCT; + case PrimitiveType::TYPE_ARRAY: + return FieldType::OLAP_FIELD_TYPE_ARRAY; + case PrimitiveType::TYPE_MAP: + return FieldType::OLAP_FIELD_TYPE_MAP; + case PrimitiveType::TYPE_HLL: + return FieldType::OLAP_FIELD_TYPE_HLL; + case PrimitiveType::TYPE_DECIMALV2: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented + case PrimitiveType::TYPE_TIME: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; + case PrimitiveType::TYPE_OBJECT: + return FieldType::OLAP_FIELD_TYPE_OBJECT; + case PrimitiveType::TYPE_STRING: + return FieldType::OLAP_FIELD_TYPE_STRING; + case PrimitiveType::TYPE_QUANTILE_STATE: + return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; + case PrimitiveType::TYPE_DATEV2: + return FieldType::OLAP_FIELD_TYPE_DATEV2; + case PrimitiveType::TYPE_DATETIMEV2: + return FieldType::OLAP_FIELD_TYPE_DATETIMEV2; + case PrimitiveType::TYPE_TIMEV2: + return FieldType::OLAP_FIELD_TYPE_TIMEV2; + case PrimitiveType::TYPE_DECIMAL32: + return FieldType::OLAP_FIELD_TYPE_DECIMAL32; + case PrimitiveType::TYPE_DECIMAL64: + return FieldType::OLAP_FIELD_TYPE_DECIMAL64; + case PrimitiveType::TYPE_DECIMAL128I: + return FieldType::OLAP_FIELD_TYPE_DECIMAL128I; + case PrimitiveType::TYPE_JSONB: + return FieldType::OLAP_FIELD_TYPE_JSONB; + case PrimitiveType::TYPE_VARIANT: + return FieldType::OLAP_FIELD_TYPE_VARIANT; + case PrimitiveType::TYPE_LAMBDA_FUNCTION: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented + case PrimitiveType::TYPE_AGG_STATE: + return FieldType::OLAP_FIELD_TYPE_AGG_STATE; + default: + return FieldType::OLAP_FIELD_TYPE_UNKNOWN; + } +} + FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { std::string upper_type_str = type_str; std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(), @@ -568,6 +644,30 @@ vectorized::DataTypePtr TabletColumn::get_vec_type() const { return vectorized::DataTypeFactory::instance().create_data_type(*this); } +// escape '.' and '_' +std::string escape_for_path_name(const std::string& s) { + std::string res; + const char* pos = s.data(); + const char* end = pos + s.size(); + while (pos != end) { + unsigned char c = *pos; + if (c == '.' || c == '_') { + res += '%'; + res += vectorized::hex_digit_uppercase(c / 16); + res += vectorized::hex_digit_uppercase(c % 16); + } else { + res += c; + } + ++pos; + } + return res; +} + +void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) { + std::string escaped_path = escape_for_path_name(path_name); + _escaped_index_suffix_path = escaped_path; +} + void TabletIndex::init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema) { _index_id = index.index_id; @@ -644,6 +744,7 @@ void TabletIndex::init_from_pb(const TabletIndexPB& index) { for (auto& kv : index.properties()) { _properties[kv.first] = kv.second; } + _escaped_index_suffix_path = index.index_suffix_name(); } void TabletIndex::to_schema_pb(TabletIndexPB* index) const { @@ -657,6 +758,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const { for (auto& kv : _properties) { (*index->mutable_properties())[kv.first] = kv.second; } + index->set_index_suffix_name(_escaped_index_suffix_path); } void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { @@ -673,6 +775,7 @@ void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { vectorized::PathInData path(col_name); column.set_path_info(path); } + _field_path_to_index[column.path_info()] = _num_columns; } if (UNLIKELY(column.name() == DELETE_SIGN)) { _delete_sign_idx = _num_columns; @@ -686,6 +789,7 @@ void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { if (col_type == ColumnType::NORMAL) { _field_name_to_index[column.name()] = _num_columns; } else if (col_type == ColumnType::VARIANT) { + _field_name_to_index[column.name()] = _num_columns; _field_path_to_index[column.path_info()] = _num_columns; } _field_id_to_index[column.unique_id()] = _num_columns; @@ -697,6 +801,23 @@ void TabletSchema::append_index(TabletIndex index) { _indexes.push_back(std::move(index)); } +void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { + int32_t col_unique_id = col.unique_id(); + const std::string& suffix_path = + !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + for (size_t i = 0; i < _indexes.size(); i++) { + for (int32_t id : _indexes[i].col_unique_ids()) { + if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { + _indexes[i] = index; + } + } + } +} + +void TabletSchema::clear_index() { + _indexes.clear(); +} + void TabletSchema::remove_index(int64_t index_id) { std::vector indexes; for (auto index : _indexes) { @@ -1000,13 +1121,16 @@ const TabletColumn& TabletSchema::column(const std::string& field_name) const { return _cols[found->second]; } -std::vector TabletSchema::get_indexes_for_column(int32_t col_unique_id) const { +std::vector TabletSchema::get_indexes_for_column( + const TabletColumn& col) const { std::vector indexes_for_column; - + int32_t col_unique_id = col.unique_id(); + const std::string& suffix_path = + !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; // TODO use more efficient impl for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id) { + if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { indexes_for_column.push_back(&(_indexes[i])); } } @@ -1015,12 +1139,15 @@ std::vector TabletSchema::get_indexes_for_column(int32_t col return indexes_for_column; } -bool TabletSchema::has_inverted_index(int32_t col_unique_id) const { +bool TabletSchema::has_inverted_index(const TabletColumn& col) const { // TODO use more efficient impl + int32_t col_unique_id = col.unique_id(); + const std::string& suffix_path = + !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].index_type() == IndexType::INVERTED) { for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id) { + if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { return true; } } @@ -1030,9 +1157,11 @@ bool TabletSchema::has_inverted_index(int32_t col_unique_id) const { return false; } -bool TabletSchema::has_inverted_index_with_index_id(int32_t index_id) const { +bool TabletSchema::has_inverted_index_with_index_id(int32_t index_id, + const std::string& suffix_name) const { for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) { + if (_indexes[i].index_type() == IndexType::INVERTED && + _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { return true; } } @@ -1040,21 +1169,29 @@ bool TabletSchema::has_inverted_index_with_index_id(int32_t index_id) const { return false; } -const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id) const { - // TODO use more efficient impl +const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, + const std::string& suffix_path) const { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].index_type() == IndexType::INVERTED) { for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id) { + if (id == col_unique_id && + _indexes[i].get_index_suffix() == escape_for_path_name(suffix_path)) { return &(_indexes[i]); } } } } - return nullptr; } +const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col) const { + // TODO use more efficient impl + int32_t col_unique_id = col.unique_id(); + const std::string& suffix_path = + !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + return get_inverted_index(col_unique_id, suffix_path); +} + bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { // TODO use more efficient impl for (size_t i = 0; i < _indexes.size(); i++) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 794c9b5b5b..059e38154f 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -35,8 +35,10 @@ #include "common/status.h" #include "gutil/stringprintf.h" #include "olap/olap_common.h" +#include "runtime/define_primitive_type.h" #include "util/string_util.h" #include "vec/aggregate_functions/aggregate_function.h" +#include "vec/common/string_utils/string_utils.h" #include "vec/json/path_in_data.h" namespace doris { @@ -79,6 +81,7 @@ public: bool is_bf_column() const { return _is_bf_column; } bool has_bitmap_index() const { return _has_bitmap_index; } bool is_array_type() const { return _type == FieldType::OLAP_FIELD_TYPE_ARRAY; } + bool is_jsonb_type() const { return _type == FieldType::OLAP_FIELD_TYPE_JSONB; } bool is_length_variable_type() const { return _type == FieldType::OLAP_FIELD_TYPE_CHAR || _type == FieldType::OLAP_FIELD_TYPE_VARCHAR || @@ -127,6 +130,7 @@ public: static std::string get_string_by_field_type(FieldType type); static std::string get_string_by_aggregation_type(FieldAggregationMethod aggregation_type); static FieldType get_field_type_by_string(const std::string& str); + static FieldType get_field_type_by_type(PrimitiveType type); static FieldAggregationMethod get_aggregation_type_by_string(const std::string& str); static uint32_t get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length); bool is_row_store_column() const; @@ -207,16 +211,15 @@ public: return 0; } - TabletIndex(const TabletIndex& other) { - _index_id = other._index_id; - _index_name = other._index_name; - _index_type = other._index_type; - _col_unique_ids = other._col_unique_ids; - _properties = other._properties; - } + + const std::string& get_index_suffix() const { return _escaped_index_suffix_path; } + + void set_escaped_escaped_index_suffix_path(const std::string& name); private: int64_t _index_id; + // Identify the different index with the same _index_id + std::string _escaped_index_suffix_path; std::string _index_name; IndexType _index_type; std::vector _col_unique_ids; @@ -234,7 +237,9 @@ public: void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex index); + void update_index(const TabletColumn& column, TabletIndex index); void remove_index(int64_t index_id); + void clear_index(); // Must make sure the row column is always the last column void add_row_column(); void copy_from(const TabletSchema& tablet_schema); @@ -287,10 +292,12 @@ public: segment_v2::CompressionTypePB compression_type() const { return _compression_type; } const std::vector& indexes() const { return _indexes; } - std::vector get_indexes_for_column(int32_t col_unique_id) const; - bool has_inverted_index(int32_t col_unique_id) const; - bool has_inverted_index_with_index_id(int32_t index_id) const; - const TabletIndex* get_inverted_index(int32_t col_unique_id) const; + std::vector get_indexes_for_column(const TabletColumn& col) const; + bool has_inverted_index(const TabletColumn& col) const; + bool has_inverted_index_with_index_id(int32_t index_id, const std::string& suffix_path) const; + const TabletIndex* get_inverted_index(const TabletColumn& col) const; + const TabletIndex* get_inverted_index(int32_t col_unique_id, + const std::string& suffix_path) const; bool has_ngram_bf_index(int32_t col_unique_id) const; const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const; void update_indexes_from_thrift(const std::vector& indexes); diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 6969edfe05..fbb8496297 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -79,8 +79,8 @@ Status IndexBuilder::update_inverted_index_info() { TabletIndex index; index.init_from_thrift(t_inverted_index, *input_rs_tablet_schema); auto column_uid = index.col_unique_ids()[0]; - const TabletIndex* exist_index = - output_rs_tablet_schema->get_inverted_index(column_uid); + const TabletColumn& col = output_rs_tablet_schema->column_by_uid(column_uid); + const TabletIndex* exist_index = output_rs_tablet_schema->get_inverted_index(col); if (exist_index && exist_index->index_id() != index.index_id()) { LOG(WARNING) << fmt::format( "column: {} has a exist inverted index, but the index id not equal " @@ -173,11 +173,11 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta continue; } auto column = output_rowset_schema->column(column_idx); - DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id)); + DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id, "")); _olap_data_convertor->add_column_data_convertor(column); return_columns.emplace_back(column_idx); std::unique_ptr field(FieldFactory::create(column)); - auto index_meta = output_rowset_schema->get_inverted_index(column.unique_id()); + auto index_meta = output_rowset_schema->get_inverted_index(column); std::unique_ptr inverted_index_builder; try { RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create( diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 93c1eb9f98..bb5a76e19b 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -32,6 +32,7 @@ #include "vec/exec/scan/pip_scanner_context.h" #include "vec/exec/scan/scanner_context.h" #include "vec/exec/scan/vscan_node.h" +#include "vec/exprs/vcast_expr.h" #include "vec/exprs/vcompound_pred.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vexpr.h" @@ -186,24 +187,14 @@ Status ScanLocalState::_normalize_conjuncts() { // The conjuncts is always on output tuple, so use _output_tuple_desc; std::vector slots = p._output_tuple_desc->slots(); - for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { - _colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id(); - - auto type = slots[slot_idx]->type().type; - if (slots[slot_idx]->type().type == TYPE_ARRAY) { - type = slots[slot_idx]->type().children[0].type; - if (type == TYPE_ARRAY) { - continue; - } - } + auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) { switch (type) { -#define M(NAME) \ - case TYPE_##NAME: { \ - ColumnValueRange range( \ - slots[slot_idx]->col_name(), slots[slot_idx]->is_nullable(), \ - slots[slot_idx]->type().precision, slots[slot_idx]->type().scale); \ - _slot_id_to_value_range[slots[slot_idx]->id()] = std::pair {slots[slot_idx], range}; \ - break; \ +#define M(NAME) \ + case TYPE_##NAME: { \ + ColumnValueRange range(slot->col_name(), slot->is_nullable(), \ + slot->type().precision, slot->type().scale); \ + _slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \ + break; \ } #define APPLY_FOR_PRIMITIVE_TYPE(M) \ M(TINYINT) \ @@ -228,11 +219,29 @@ Status ScanLocalState::_normalize_conjuncts() { APPLY_FOR_PRIMITIVE_TYPE(M) #undef M default: { - VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name() - << "]"; + VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slot->col_name() << "]"; break; } } + }; + + for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { + _colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id(); + _slot_id_to_slot_desc[slots[slot_idx]->id()] = slots[slot_idx]; + + auto type = slots[slot_idx]->type().type; + if (slots[slot_idx]->type().type == TYPE_ARRAY) { + type = slots[slot_idx]->type().children[0].type; + if (type == TYPE_ARRAY) { + continue; + } + } + init_value_range(slots[slot_idx], slots[slot_idx]->type().type); + } + + get_cast_types_for_variants(); + for (const auto& [colname, type] : _cast_types_for_variants) { + init_value_range(_slot_id_to_slot_desc[_colname_to_slot_id[colname]], type); } for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { @@ -323,6 +332,16 @@ Status ScanLocalState::_normalize_predicate( output_expr = nullptr; return Status::OK(); } + std::shared_ptr slotref; + for (const auto& child : cur_expr->children()) { + if (vectorized::VExpr::expr_without_cast(child)->node_type() != + TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + slotref = std::dynamic_pointer_cast( + vectorized::VExpr::expr_without_cast(child)); + } if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || _is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range)) { Status status = Status::OK(); @@ -382,6 +401,14 @@ Status ScanLocalState::_normalize_predicate( return Status::OK(); } + if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE && slotref != nullptr && + slotref->type().is_variant_type()) { + // remaining it in the expr tree, in order to filter by function if the pushdown + // predicate is not applied + output_expr = conjunct_expr_root; // remaining in conjunct tree + return Status::OK(); + } + if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE && (_is_key_column(slot->col_name()) || _storage_no_merge())) { output_expr = nullptr; @@ -536,6 +563,10 @@ bool ScanLocalState::_ignore_cast(SlotDescriptor* slot, vectorized::VEx if (slot->type().is_string_type() && expr->type().is_string_type()) { return true; } + // Variant slot cast could be eliminated + if (slot->type().is_variant_type()) { + return true; + } if (slot->type().is_array_type()) { if (slot->type().children[0].type == expr->type().type) { return true; @@ -1266,6 +1297,52 @@ Status ScanLocalState::_init_profile() { return Status::OK(); } +template +void ScanLocalState::_filter_and_collect_cast_type_for_variant( + const vectorized::VExpr* expr, + phmap::flat_hash_map>& colname_to_cast_types) { + const auto* cast_expr = dynamic_cast(expr); + if (cast_expr != nullptr) { + const auto* src_slot = + cast_expr->get_child(0)->node_type() == TExprNodeType::SLOT_REF + ? dynamic_cast(cast_expr->get_child(0).get()) + : nullptr; + if (src_slot == nullptr) { + return; + } + std::vector slots = output_tuple_desc()->slots(); + SlotDescriptor* src_slot_desc = _slot_id_to_slot_desc[src_slot->slot_id()]; + PrimitiveType cast_dst_type = + cast_expr->get_target_type()->get_type_as_type_descriptor().type; + if (src_slot_desc->type().is_variant_type()) { + colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type); + } + } + for (const auto& child : expr->children()) { + _filter_and_collect_cast_type_for_variant(child.get(), colname_to_cast_types); + } +} + +template +void ScanLocalState::get_cast_types_for_variants() { + phmap::flat_hash_map> colname_to_cast_types; + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { + auto& conjunct = *it; + if (conjunct->root()) { + _filter_and_collect_cast_type_for_variant(conjunct->root().get(), + colname_to_cast_types); + } + ++it; + } + // cast to one certain type for variant could utilize fully predicates performance + // when storage layer type equals to cast type + for (const auto& [slotid, types] : colname_to_cast_types) { + if (types.size() == 1) { + _cast_types_for_variants[slotid] = types[0]; + } + } +} + template ScanOperatorX::ScanOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id, const DescriptorTbl& descs) diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index 42185fc80c..06fe2452c7 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -25,6 +25,7 @@ #include "common/status.h" #include "operator.h" #include "pipeline/pipeline_x/operator.h" +#include "runtime/descriptors.h" #include "vec/exec/scan/vscan_node.h" namespace doris { @@ -359,6 +360,14 @@ protected: // Submit the scanner to the thread pool and start execution Status _start_scanners(const std::list& scanners); + // For some conjunct there is chance to elimate cast operator + // Eg. Variant's sub column could eliminate cast in storage layer if + // cast dst column type equals storage column type + void get_cast_types_for_variants(); + void _filter_and_collect_cast_type_for_variant( + const vectorized::VExpr* expr, + phmap::flat_hash_map>& colname_to_cast_types); + // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector // so that it will be destroyed uniformly at the end of the query. vectorized::VExprContextSPtrs _stale_expr_ctxs; @@ -371,6 +380,12 @@ protected: // Save all function predicates which may be pushed down to data source. std::vector _push_down_functions; + // colname -> cast dst type + std::map _cast_types_for_variants; + + // slot id -> SlotDescriptor + phmap::flat_hash_map _slot_id_to_slot_desc; + // slot id -> ColumnValueRange // Parsed from conjuncts phmap::flat_hash_map> diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 5f1868f436..35de12af1a 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -1422,14 +1422,16 @@ void PInternalServiceImpl::request_slave_tablet_pull_rowset( for (auto index_size : segment_indices_size.index_sizes()) { auto index_id = index_size.indexid(); auto size = index_size.size(); + auto suffix_path = index_size.suffix_path(); std::string remote_inverted_index_file = - InvertedIndexDescriptor::get_index_file_name(remote_file_path, - index_id); + InvertedIndexDescriptor::get_index_file_name(remote_file_path, index_id, + suffix_path); std::string remote_inverted_index_file_url = construct_url( get_host_port(host, http_port), token, remote_inverted_index_file); std::string local_inverted_index_file = - InvertedIndexDescriptor::get_index_file_name(local_file_path, index_id); + InvertedIndexDescriptor::get_index_file_name(local_file_path, index_id, + suffix_path); st = download_file_action(remote_inverted_index_file_url, local_inverted_index_file, estimate_timeout, size); if (!st.ok()) { @@ -1690,8 +1692,10 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, std::unique_ptr column_iterator; vectorized::MutableColumnPtr column = result_block.get_by_position(x).column->assume_mutable(); - RETURN_IF_ERROR( - segment->new_column_iterator(full_read_schema.column(index), &column_iterator)); + StorageReadOptions storage_read_opt; + storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY; + RETURN_IF_ERROR(segment->new_column_iterator(full_read_schema.column(index), + &column_iterator, &storage_read_opt)); segment_v2::ColumnIteratorOptions opt { .use_page_cache = !config::disable_storage_page_cache, .file_reader = segment->file_reader().get(), diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index fb793abe75..ed251ef4ac 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -51,7 +52,6 @@ #include "vec/common/field_visitors.h" #include "vec/common/schema_util.h" #include "vec/common/string_buffer.hpp" -#include "vec/common/typeid_cast.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/field.h" #include "vec/core/types.h" @@ -436,8 +436,7 @@ ColumnPtr ColumnObject::index(const IColumn& indexes, size_t limit) const { } bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) { - constexpr static size_t s_threshold_rows_estimate_sparse_column = 1000; - if (num_rows < s_threshold_rows_estimate_sparse_column) { + if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) { return false; } std::vector defaults_ratio; @@ -446,7 +445,7 @@ bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) { } double default_ratio = std::accumulate(defaults_ratio.begin(), defaults_ratio.end(), 0.0) / defaults_ratio.size(); - return default_ratio >= config::ratio_of_defaults_as_sparse_column; + return default_ratio >= config::variant_ratio_of_defaults_as_sparse_column; } void ColumnObject::Subcolumn::finalize() { @@ -756,6 +755,9 @@ FieldInfo ColumnObject::Subcolumn::get_subcolumn_field_info() const { } void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t length) { +#ifndef NDEBUG + check_consistency(); +#endif const auto& src_object = assert_cast(src); for (const auto& entry : src_object.subcolumns) { if (!has_subcolumn(entry->path)) { @@ -827,9 +829,13 @@ bool ColumnObject::add_sub_column(const PathInData& key, MutableColumnPtr&& subc } if (key.empty() && ((!subcolumns.get_root()->is_scalar()) || is_nothing(subcolumns.get_root()->data.get_least_common_type()))) { - // update root + bool root_it_scalar = subcolumns.get_root()->is_scalar(); + // update root to scalar subcolumns.get_mutable_root()->modify_to_scalar( Subcolumn(std::move(subcolumn), type, is_nullable, true)); + if (!root_it_scalar) { + subcolumns.add_leaf(subcolumns.get_root_ptr()); + } if (num_rows == 0) { num_rows = new_size; } @@ -859,6 +865,7 @@ bool ColumnObject::add_sub_column(const PathInData& key, size_t new_size) { if (key.empty() && (!subcolumns.get_root()->is_scalar())) { // update none scalar root column to scalar node subcolumns.get_mutable_root()->modify_to_scalar(Subcolumn(new_size, is_nullable, true)); + subcolumns.add_leaf(subcolumns.get_root_ptr()); if (num_rows == 0) { num_rows = new_size; } @@ -903,11 +910,6 @@ bool ColumnObject::is_finalized() const { [](const auto& entry) { return entry->data.is_finalized(); }); } -static bool check_if_valid_column_name(const PathInData& path) { - static const std::regex COLUMN_NAME_REGEX("^[_a-zA-Z@0-9][.a-zA-Z0-9_+-/>data.get_finalized_column_ptr() = mresult->get_ptr(); } +void ColumnObject::finalize_if_not() { + if (!is_finalized()) { + finalize(); + } +} + void ColumnObject::finalize(bool ignore_sparse) { Subcolumns new_subcolumns; // finalize root first @@ -1185,8 +1193,7 @@ void ColumnObject::finalize(bool ignore_sparse) { } // Check and spilit sparse subcolumns - if (!ignore_sparse && (entry->data.check_if_sparse_column(num_rows) || - !check_if_valid_column_name(entry->path))) { + if (!ignore_sparse && (entry->data.check_if_sparse_column(num_rows))) { // TODO seperate ambiguous path sparse_columns.add(entry->path, entry->data); continue; @@ -1239,6 +1246,18 @@ void ColumnObject::strip_outer_array() { std::swap(subcolumns, new_subcolumns); } +void ColumnObject::replicate(const uint32_t* indexs, size_t target_size, IColumn& column) const { + if (!is_finalized()) { + const_cast(this)->finalize(); + } + auto& var = assert_cast(column); + for (auto& entry : subcolumns) { + auto replica = entry->data.get_finalized_column().clone_empty(); + entry->data.get_finalized_column().replicate(indexs, target_size, *replica); + var.add_sub_column(entry->path, std::move(replica), entry->data.get_least_common_type()); + } +} + ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const { if (!is_finalized()) { const_cast(this)->finalize(); @@ -1299,16 +1318,23 @@ size_t ColumnObject::filter(const Filter& filter) { return count; } -void ColumnObject::clear() { +void ColumnObject::clear_subcolumns_data() { for (auto& entry : subcolumns) { for (auto& part : entry->data.data) { - part->clear(); + DCHECK_EQ(part->use_count(), 1); + (*std::move(part)).clear(); } entry->data.num_of_defaults_in_prefix = 0; } num_rows = 0; } +void ColumnObject::clear() { + Subcolumns empty; + std::swap(empty, subcolumns); + num_rows = 0; +} + void ColumnObject::revise_to(int target_num_rows) { for (auto&& entry : subcolumns) { if (entry->data.size() > target_num_rows) { @@ -1345,7 +1371,8 @@ bool ColumnObject::is_null_root() const { bool ColumnObject::is_scalar_variant() const { // Only root itself - return !is_null_root() && subcolumns.get_leaves().size() == 1; + return !is_null_root() && subcolumns.get_leaves().size() == 1 && + subcolumns.get_root()->is_scalar(); } DataTypePtr ColumnObject::get_root_type() const { @@ -1390,78 +1417,20 @@ Status ColumnObject::extract_root(const PathInData& path, MutableColumnPtr& dst) return Status::OK(); } -template -void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src, size_t row_cnt, - ColumnInserterFn inserter) { - CHECK(dst.is_finalized()); - if (!src.is_finalized()) { - const_cast(src).finalize(); - } - // Use rows() here instead of size(), since size() will check_consistency - // but we could not check_consistency since num_rows will be upgraded even - // if src and dst is empty, we just increase the num_rows of dst and fill - // num_rows of default values when meet new data - size_t num_rows = dst.rows(); - for (auto& entry : dst.get_subcolumns()) { - const auto* src_subcol = src.get_subcolumn(entry->path); - if (src_subcol == nullptr) { - entry->data.get_finalized_column().insert_many_defaults(row_cnt); - } else { - // It's the first time alignment, so that we should build it - if (entry->data.get_least_common_type()->get_type_id() == TypeIndex::Nothing) { - entry->data.add_new_column_part(src_subcol->get_least_common_type()); - } - // TODO handle type confict here, like ColumnObject before - CHECK(entry->data.get_least_common_type()->equals( - *src_subcol->get_least_common_type())); - const auto& src_column = src_subcol->get_finalized_column(); - inserter(src_column, &entry->data.get_finalized_column()); - } - dst.set_num_rows(entry->data.get_finalized_column().size()); - } - for (const auto& entry : src.get_subcolumns()) { - // encounter a new column - const auto* dst_subcol = dst.get_subcolumn(entry->path); - if (dst_subcol == nullptr) { - auto type = entry->data.get_least_common_type(); - auto new_column = type->create_column(); - new_column->insert_many_defaults(num_rows); - inserter(entry->data.get_finalized_column(), new_column.get()); - dst.set_num_rows(new_column->size()); - dst.add_sub_column(entry->path, std::move(new_column)); - } - } - num_rows += row_cnt; - if (dst.empty()) { - dst.incr_num_rows(row_cnt); - } -#ifndef NDEBUG - // Check all columns rows matched - for (const auto& entry : dst.get_subcolumns()) { - DCHECK_EQ(entry->data.get_finalized_column().size(), num_rows); - } -#endif -} - void ColumnObject::append_data_by_selector(MutableColumnPtr& res, const IColumn::Selector& selector) const { - // append by selector with alignment - ColumnObject& dst_column = *assert_cast(res.get()); - align_variant_by_name_and_type(dst_column, *this, selector.size(), - [&selector](const IColumn& src, IColumn* dst) { - auto mutable_dst = dst->assume_mutable(); - src.append_data_by_selector(mutable_dst, selector); - }); + return append_data_by_selector_impl(res, selector); } void ColumnObject::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { - // insert_indices_from with alignment - const ColumnObject& src_column = *check_and_get_column(src); - align_variant_by_name_and_type(*this, src_column, indices_end - indices_begin, - [indices_begin, indices_end](const IColumn& src, IColumn* dst) { - dst->insert_indices_from(src, indices_begin, indices_end); - }); + for (auto x = indices_begin; x != indices_end; ++x) { + if (*x == -1) { + ColumnObject::insert_default(); + } else { + ColumnObject::insert_from(src, *x); + } + } } void ColumnObject::insert_indices_from_join(const IColumn& src, const uint32_t* indices_begin, diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index c279042251..30c524555f 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -337,8 +337,19 @@ public: return finalized; } + void finalize_if_not(); + void clear() override; + void clear_subcolumns_data(); + + std::string get_name() const override { + if (is_scalar_variant()) { + return "var_scalar(" + get_root()->get_name() + ")"; + } + return "variant"; + } + /// Part of interface const char* get_family_name() const override { return "Variant"; } @@ -447,9 +458,7 @@ public: LOG(FATAL) << "should not call the method in column object"; } - void replicate(const uint32_t* indexs, size_t target_size, IColumn& column) const override { - LOG(FATAL) << "not support"; - } + void replicate(const uint32_t* indexs, size_t target_size, IColumn& column) const override; template MutableColumnPtr apply_for_subcolumns(Func&& func) const; diff --git a/be/src/vec/columns/subcolumn_tree.h b/be/src/vec/columns/subcolumn_tree.h index a9a37cdf41..f895b94ded 100644 --- a/be/src/vec/columns/subcolumn_tree.h +++ b/be/src/vec/columns/subcolumn_tree.h @@ -116,6 +116,8 @@ public: leaves.push_back(root); } + void add_leaf(const NodePtr& node) { leaves.push_back(node); } + bool add(const PathInData& path, const NodeCreator& node_creator) { const auto& parts = path.get_parts(); @@ -218,6 +220,7 @@ public: const Nodes& get_leaves() const { return leaves; } const Node* get_root() const { return root.get(); } + const NodePtr& get_root_ptr() const { return root; } Node* get_mutable_root() { return root.get(); } static void get_leaves_of_node(const Node* node, std::vector& nodes, diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index a42181e967..2c678f7051 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -31,8 +31,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -163,15 +165,26 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co // Variant column is a really special case, src type is nullable but dst variant type is none nullable, // but we still need to wrap nullmap into variant root column to prevent from nullable info lost. // TODO rethink and better handle this sepecial situation - if (arg.type->is_nullable() && WhichDataType(type).is_variant_type()) { + if (arg.type->is_nullable() && WhichDataType(remove_nullable(type)).is_variant_type()) { auto variant = ColumnObject::create(true); - auto& old_variant = assert_cast(*(*result)->assume_mutable()); + auto& old_variant = + (*result)->is_nullable() + ? assert_cast( + assert_cast(**result).get_nested_column()) + : assert_cast(*(*result)->assume_mutable()); DCHECK(!old_variant.get_root()->is_nullable()); auto nullable = ColumnNullable::create( old_variant.get_root(), assert_cast(*arg.column).get_null_map_column_ptr()); variant->create_root(make_nullable(arg.type), nullable->assume_mutable()); - *result = std::move(variant); + if ((*result)->is_nullable()) { + *result = ColumnNullable::create(std::move(variant), + assert_cast(*arg.column) + .get_null_map_column_ptr() + ->clone_resized(nullable->size())); + } else { + *result = std::move(variant); + } } return Status::OK(); } @@ -213,9 +226,17 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str return; } // TODO handle more types like struct/date/datetime/decimal... + LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } +TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, + const ExtraInfo& ext_info) { + TabletColumn result; + get_column_by_type(data_type, name, result, ext_info); + return result; +} + TabletColumn get_least_type_column(const TabletColumn& original, const DataTypePtr& new_type, const ExtraInfo& ext_info, bool* changed) { TabletColumn result_column; @@ -289,45 +310,115 @@ void update_least_common_schema(const std::vector& schemas, TabletColumn common_column; // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path(); get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column, - ExtraInfo {.unique_id = -1, + ExtraInfo {.unique_id = variant_col_unique_id, .parent_unique_id = variant_col_unique_id, .path_info = tuple_paths[i]}); common_schema->append_column(common_column); } } -void get_least_common_schema(const std::vector& schemas, - TabletSchemaSPtr& common_schema) { - // Pick tablet schema with max schema version - const TabletSchemaSPtr base_schema = - *std::max_element(schemas.cbegin(), schemas.cend(), - [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) { - return a->schema_version() < b->schema_version(); - }); - CHECK(base_schema); - CHECK(common_schema); - common_schema->copy_from(*base_schema); - // Merge columns from other schemas - common_schema->clear_columns(); - std::vector variant_column_unique_id; - // Get all columns without extracted columns and collect variant col unique id - for (const TabletColumn& col : base_schema->columns()) { - if (col.is_variant_type()) { - variant_column_unique_id.push_back(col.unique_id()); +void inherit_tablet_index(TabletSchemaSPtr& schema) { + std::unordered_map variants_index_meta; + // Get all variants tablet index metas if exist + for (const auto& col : schema->columns()) { + auto index_meta = schema->get_inverted_index(col.unique_id(), ""); + if (col.is_variant_type() && index_meta != nullptr) { + variants_index_meta.emplace(col.unique_id(), *index_meta); } + } + + // Add index meta if extracted column is missing index meta + for (const auto& col : schema->columns()) { if (!col.is_extracted_column()) { - common_schema->append_column(col); + continue; + } + auto it = variants_index_meta.find(col.parent_unique_id()); + // variant has no index meta, ignore + if (it == variants_index_meta.end()) { + continue; + } + auto index_meta = schema->get_inverted_index(col); + // add index meta + TabletIndex index_info = it->second; + index_info.set_escaped_escaped_index_suffix_path(col.path_info().get_path()); + if (index_meta != nullptr) { + // already exist + schema->update_index(col, index_info); + } else { + schema->append_index(index_info); } } - for (int32_t unique_id : variant_column_unique_id) { - update_least_common_schema(schemas, common_schema, unique_id); +} + +TabletSchemaSPtr get_least_common_schema(const std::vector& schemas, + const TabletSchemaSPtr& base_schema) { + auto output_schema = std::make_shared(); + std::vector variant_column_unique_id; + if (base_schema == nullptr) { + // Pick tablet schema with max schema version + auto max_version_schema = + *std::max_element(schemas.cbegin(), schemas.cend(), + [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) { + return a->schema_version() < b->schema_version(); + }); + CHECK(max_version_schema); + output_schema->copy_from(*max_version_schema); + // Merge columns from other schemas + output_schema->clear_columns(); + // Get all columns without extracted columns and collect variant col unique id + for (const TabletColumn& col : max_version_schema->columns()) { + if (col.is_variant_type()) { + variant_column_unique_id.push_back(col.unique_id()); + } + if (!col.is_extracted_column()) { + output_schema->append_column(col); + } + } + } else { + // use input common schema as base schema + // Get all columns without extracted columns and collect variant col unique id + for (const TabletColumn& col : base_schema->columns()) { + if (col.is_variant_type()) { + variant_column_unique_id.push_back(col.unique_id()); + } + } + output_schema->copy_from(*base_schema); } + + for (int32_t unique_id : variant_column_unique_id) { + update_least_common_schema(schemas, output_schema, unique_id); + } + + inherit_tablet_index(output_schema); + return output_schema; +} + +Status parse_and_encode_variant_columns(Block& block, const std::vector& variant_pos) { + try { + // Parse each variant column from raw string column + vectorized::schema_util::parse_variant_columns(block, variant_pos); + vectorized::schema_util::finalize_variant_columns(block, variant_pos, + false /*not ingore sparse*/); + vectorized::schema_util::encode_variant_sparse_subcolumns(block, variant_pos); + } catch (const doris::Exception& e) { + // TODO more graceful, max_filter_ratio + LOG(WARNING) << "encounter execption " << e.to_string(); + return Status::InternalError(e.to_string()); + } + return Status::OK(); } void parse_variant_columns(Block& block, const std::vector& variant_pos) { for (int i = 0; i < variant_pos.size(); ++i) { - auto& column = block.get_by_position(variant_pos[i]).column; - const auto& root = *assert_cast(*column.get()).get_root(); + const auto& column_ref = block.get_by_position(variant_pos[i]).column; + bool is_nullable = column_ref->is_nullable(); + const auto& column = remove_nullable(column_ref); + const auto& var = assert_cast(*column.get()); + if (!var.is_scalar_variant()) { + // already parsed + continue; + } + const auto& root = *var.get_root(); const auto& raw_json_column = root.is_nullable() ? static_cast( @@ -335,24 +426,39 @@ void parse_variant_columns(Block& block, const std::vector& variant_pos) { : static_cast(root); MutableColumnPtr variant_column = ColumnObject::create(true); parse_json_to_variant(*variant_column.get(), raw_json_column); - block.get_by_position(variant_pos[i]).column = variant_column->get_ptr(); - block.get_by_position(variant_pos[i]).type = std::make_shared("json", true); + // Wrap variant with nullmap if it is nullable + ColumnPtr result = variant_column->get_ptr(); + if (is_nullable) { + const auto& null_map = + assert_cast(*column_ref).get_null_map_column_ptr(); + result = ColumnNullable::create(result, null_map); + } + block.get_by_position(variant_pos[i]).column = result; + // block.get_by_position(variant_pos[i]).type = std::make_shared("json", true); } } void finalize_variant_columns(Block& block, const std::vector& variant_pos, bool ignore_sparse) { for (int i = 0; i < variant_pos.size(); ++i) { - auto& column = assert_cast( - block.get_by_position(variant_pos[i]).column->assume_mutable_ref()); + auto& column_ref = block.get_by_position(variant_pos[i]).column->assume_mutable_ref(); + auto& column = + column_ref.is_nullable() + ? assert_cast( + assert_cast(column_ref).get_nested_column()) + : assert_cast(column_ref); column.finalize(ignore_sparse); } } void encode_variant_sparse_subcolumns(Block& block, const std::vector& variant_pos) { for (int i = 0; i < variant_pos.size(); ++i) { - auto& column = assert_cast( - block.get_by_position(variant_pos[i]).column->assume_mutable_ref()); + auto& column_ref = block.get_by_position(variant_pos[i]).column->assume_mutable_ref(); + auto& column = + column_ref.is_nullable() + ? assert_cast( + assert_cast(column_ref).get_nested_column()) + : assert_cast(column_ref); // Make sure the root node is jsonb storage type auto expected_root_type = make_nullable(std::make_shared()); column.ensure_root_node_type(expected_root_type); @@ -388,4 +494,11 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) } // --------------------------- +std::string dump_column(DataTypePtr type, const ColumnPtr& col) { + Block tmp; + tmp.insert(ColumnWithTypeAndName {col, type, col->get_name()}); + return tmp.dump_data(0, tmp.rows()); +} +// --------------------------- + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index bdf43adaac..d5d01b57ed 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -75,16 +75,18 @@ struct ExtraInfo { int32_t parent_unique_id = -1; vectorized::PathInData path_info; }; -void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, - TabletColumn& column, const ExtraInfo& ext_info); + +TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, + const ExtraInfo& ext_info); TabletColumn get_least_type_column(const TabletColumn& original, const DataTypePtr& new_type, const ExtraInfo& ext_info, bool* changed); -// Two steps to parse variant columns into flatterned columns +// thread steps to parse and encode variant columns into flatterned columns // 1. parse variant from raw json string // 2. finalize variant column to each subcolumn least commn types, default ignore sparse sub columns // 2. encode sparse sub columns +Status parse_and_encode_variant_columns(Block& block, const std::vector& variant_pos); void parse_variant_columns(Block& block, const std::vector& variant_pos); void finalize_variant_columns(Block& block, const std::vector& variant_pos, bool ignore_sparse = true); @@ -92,16 +94,22 @@ void encode_variant_sparse_subcolumns(Block& block, const std::vector& vari // Pick the tablet schema with the highest schema version as the reference. // Then update all variant columns to there least common types. -// Return the final merged schema as common schema -void get_least_common_schema(const std::vector& schemas, - TabletSchemaSPtr& common_schema); +// Return the final merged schema as common schema. +// If base_schema == nullptr then, max schema version tablet schema will be picked as base schema +TabletSchemaSPtr get_least_common_schema(const std::vector& schemas, + const TabletSchemaSPtr& base_schema); // Get least common types for extracted columns which has Path info, // with a speicified variant column's unique id void update_least_common_schema(const std::vector& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id); +// inherit index info from it's parent column +void inherit_tablet_index(TabletSchemaSPtr& schema); + // Extract json data from source with path Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst); +std::string dump_column(DataTypePtr type, const ColumnPtr& col); + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index e125706cbe..6c7fa80cb8 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -136,35 +136,6 @@ public: } const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; } - // need exception safety - Status copy_column_data_to_block(doris::vectorized::IColumn* input_col_ptr, - uint16_t* sel_rowid_idx, uint16_t select_size, int block_cid, - size_t batch_size) { - // Only the additional deleted filter condition need to materialize column be at the end of the block - // We should not to materialize the column of query engine do not need. So here just return OK. - // Eg: - // `delete from table where a = 10;` - // `select b from table;` - // a column only effective in segment iterator, the block from query engine only contain the b column. - // so the `block_cid >= data.size()` is true - if (block_cid >= data.size()) { - return Status::OK(); - } - - MutableColumnPtr raw_res_ptr = this->get_by_position(block_cid).column->assume_mutable(); - raw_res_ptr->reserve(batch_size); - - // adapt for outer join change column to nullable - if (raw_res_ptr->is_nullable() && !input_col_ptr->is_nullable()) { - auto col_ptr_nullable = - reinterpret_cast(raw_res_ptr.get()); - col_ptr_nullable->get_null_map_column().insert_many_defaults(select_size); - raw_res_ptr = col_ptr_nullable->get_nested_column_ptr(); - } - - return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, raw_res_ptr); - } - void replace_by_position(size_t position, ColumnPtr&& res) { this->get_by_position(position).column = std::move(res); } diff --git a/be/src/vec/core/columns_with_type_and_name.h b/be/src/vec/core/columns_with_type_and_name.h index e77ec4930b..c70775fcae 100644 --- a/be/src/vec/core/columns_with_type_and_name.h +++ b/be/src/vec/core/columns_with_type_and_name.h @@ -20,12 +20,16 @@ #pragma once +#include +#include #include #include "vec/core/column_with_type_and_name.h" +#include "vec/data_types/data_type.h" namespace doris::vectorized { using ColumnsWithTypeAndName = std::vector; +using NameAndTypePair = std::pair; -} +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index fa45539e7e..c8921c4eba 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -178,7 +178,10 @@ public: if constexpr (std::is_same_v, TypeId>) { return doris::FieldType::OLAP_FIELD_TYPE_DECIMAL128I; } - __builtin_unreachable(); + if constexpr (std::is_same_v, TypeId>) { + return doris::FieldType::OLAP_FIELD_TYPE_DECIMAL256; + } + return doris::FieldType::OLAP_FIELD_TYPE_DECIMAL; } int64_t get_uncompressed_serialized_bytes(const IColumn& column, diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 8b86fa1403..fc31712817 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -162,7 +162,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared(); break; case TYPE_VARIANT: - return std::make_shared("", true); + nested = std::make_shared("", true); + break; case TYPE_STRING: case TYPE_CHAR: case TYPE_VARCHAR: @@ -302,7 +303,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared(); break; case TypeIndex::VARIANT: - return std::make_shared("", true); + nested = std::make_shared("", true); + break; case TypeIndex::Decimal32: nested = std::make_shared>(BeConsts::MAX_DECIMAL32_PRECISION, 0); break; @@ -400,7 +402,8 @@ DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type, result = std::make_shared(); break; case FieldType::OLAP_FIELD_TYPE_VARIANT: - return std::make_shared("", true); + result = std::make_shared("", true); + break; case FieldType::OLAP_FIELD_TYPE_JSONB: result = std::make_shared(); break; @@ -479,7 +482,8 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { nested = std::make_shared(); break; case PGenericType::VARIANT: - return std::make_shared("", true); + nested = std::make_shared("", true); + break; case PGenericType::JSONB: nested = std::make_shared(); break; diff --git a/be/src/vec/data_types/data_type_ipv4.h b/be/src/vec/data_types/data_type_ipv4.h index 4f0c44dfaf..d2bd3e487c 100644 --- a/be/src/vec/data_types/data_type_ipv4.h +++ b/be/src/vec/data_types/data_type_ipv4.h @@ -25,6 +25,7 @@ #include #include "common/status.h" +#include "olap/olap_common.h" #include "runtime/define_primitive_type.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -47,6 +48,10 @@ public: const char* get_family_name() const override { return "IPv4"; } std::string do_get_name() const override { return "IPv4"; } + doris::FieldType get_storage_field_type() const override { + return doris::FieldType::OLAP_FIELD_TYPE_IPV4; + } + bool equals(const IDataType& rhs) const override; std::string to_string(const IColumn& column, size_t row_num) const override; void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; diff --git a/be/src/vec/data_types/data_type_ipv6.h b/be/src/vec/data_types/data_type_ipv6.h index b6cf52f7b8..f849dab98a 100755 --- a/be/src/vec/data_types/data_type_ipv6.h +++ b/be/src/vec/data_types/data_type_ipv6.h @@ -45,6 +45,9 @@ namespace doris::vectorized { class DataTypeIPv6 final : public DataTypeNumberBase { public: TypeIndex get_type_id() const override { return TypeIndex::IPv6; } + doris::FieldType get_storage_field_type() const override { + return doris::FieldType::OLAP_FIELD_TYPE_IPV6; + } const char* get_family_name() const override { return "IPv6"; } std::string do_get_name() const override { return "IPv6"; } diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index 8099ac4ba0..4ec30a3ef9 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -99,6 +99,10 @@ public: } doris::FieldType get_storage_field_type() const override { + // Doris does not support uint8 at present, use uint8 as boolean type + if constexpr (std::is_same_v, TypeId>) { + return doris::FieldType::OLAP_FIELD_TYPE_BOOL; + } if constexpr (std::is_same_v, TypeId>) { return doris::FieldType::OLAP_FIELD_TYPE_TINYINT; } @@ -120,6 +124,7 @@ public: if constexpr (std::is_same_v, TypeId>) { return doris::FieldType::OLAP_FIELD_TYPE_DOUBLE; } + LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index bfe071475f..c0405242fc 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -47,10 +47,7 @@ namespace doris::vectorized { DataTypeObject::DataTypeObject(const String& schema_format_, bool is_nullable_) : schema_format(to_lower(schema_format_)), is_nullable(is_nullable_) {} bool DataTypeObject::equals(const IDataType& rhs) const { - if (const auto* object = typeid_cast(&rhs)) { - return schema_format == object->schema_format && is_nullable == object->is_nullable; - } - return false; + return typeid_cast(&rhs) != nullptr; } int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 5e7902ed6d..b3f67d5e99 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -199,7 +199,7 @@ void DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, rapidjson auto& data = assert_cast(column); const auto jsonb_val = data.get_data_at(row_num); if (jsonb_val.empty()) { - result.SetNull(); + return; } JsonbValue* val = JsonbDocument::createValue(jsonb_val.data, jsonb_val.size); if (val == nullptr) { diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index af4c780757..be06280d79 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include "common/config.h" #include "common/logging.h" @@ -44,6 +46,7 @@ #include "olap/tablet.h" #include "olap/tablet_manager.h" #include "runtime/decimalv2_value.h" +#include "runtime/define_primitive_type.h" #include "runtime/query_statistics.h" #include "runtime/runtime_state.h" #include "runtime/types.h" @@ -54,9 +57,11 @@ #include "vec/columns/column_const.h" #include "vec/common/string_ref.h" #include "vec/exec/scan/new_olap_scanner.h" +#include "vec/exprs/vcast_expr.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vslot_ref.h" namespace doris { class DescriptorTbl; @@ -414,6 +419,51 @@ std::string NewOlapScanNode::get_name() { return fmt::format("VNewOlapScanNode({0})", _olap_scan_node.table_name); } +void NewOlapScanNode::_filter_and_collect_cast_type_for_variant( + const VExpr* expr, + phmap::flat_hash_map>& colname_to_cast_types) { + auto* cast_expr = dynamic_cast(expr); + if (cast_expr != nullptr) { + auto* src_slot = cast_expr->get_child(0)->node_type() == TExprNodeType::SLOT_REF + ? dynamic_cast(cast_expr->get_child(0).get()) + : nullptr; + if (src_slot == nullptr) { + return; + } + std::vector slots = _output_tuple_desc->slots(); + SlotDescriptor* src_slot_desc = _slot_id_to_slot_desc[src_slot->slot_id()]; + PrimitiveType cast_dst_type = + cast_expr->get_target_type()->get_type_as_type_descriptor().type; + if (src_slot_desc->type().is_variant_type()) { + colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type); + } + } + for (const auto& child : expr->children()) { + _filter_and_collect_cast_type_for_variant(child.get(), colname_to_cast_types); + } +} + +void NewOlapScanNode::get_cast_types_for_variants() { + phmap::flat_hash_map> colname_to_cast_types; + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { + auto& conjunct = *it; + if (conjunct->root()) { + _filter_and_collect_cast_type_for_variant(conjunct->root().get(), + colname_to_cast_types); + } + ++it; + } + // cast to one certain type for variant could utilize fully predicates performance + // when storage layer type equals to cast type + for (const auto& [name, types] : colname_to_cast_types) { + // If cast to multiple types detected, then we should not elimate cast to predicate + // but let the expr to handle such case + if (types.size() == 1) { + _cast_types_for_variants[name] = types[0]; + } + } +} + Status NewOlapScanNode::_init_scanners(std::list* scanners) { if (_scan_ranges.empty()) { _eos = true; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index 93039c6182..06b68497b5 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -102,6 +102,14 @@ protected: void add_filter_info(int id, const PredicateFilterInfo& info); + // For some conjunct there is chance to elimate cast operator + // Eg. Variant's sub column could eliminate cast in storage layer if + // cast dst column type equals storage column type + void get_cast_types_for_variants() override; + void _filter_and_collect_cast_type_for_variant( + const VExpr* expr, + phmap::flat_hash_map>& colname_to_cast_types); + private: Status _build_key_ranges_and_filters(); diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 53417039c8..de4aa3e27f 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -53,6 +53,7 @@ #include "service/backend_options.h" #include "util/doris_metrics.h" #include "util/runtime_profile.h" +#include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/exec/scan/new_olap_scan_node.h" #include "vec/exec/scan/vscan_node.h" @@ -143,7 +144,8 @@ Status NewOlapScanner::init() { _parent ? parent->_olap_scan_node : local_state->olap_scan_node(); if (olap_scan_node.__isset.schema_version && olap_scan_node.__isset.columns_desc && !olap_scan_node.columns_desc.empty() && - olap_scan_node.columns_desc[0].col_unique_id >= 0) { + olap_scan_node.columns_desc[0].col_unique_id >= 0 && + tablet->tablet_schema()->num_variant_columns() == 0) { schema_key = SchemaCache::get_schema_key( tablet->tablet_id(), olap_scan_node.columns_desc, olap_scan_node.schema_version, SchemaCache::Type::TABLET_SCHEMA); @@ -256,6 +258,7 @@ Status NewOlapScanner::_init_tablet_reader_params( push_down_agg_type != TPushAggOp::COUNT_ON_INDEX); } + RETURN_IF_ERROR(_init_variant_columns()); RETURN_IF_ERROR(_init_return_columns()); _tablet_reader_params.reader_type = ReaderType::READER_QUERY; @@ -277,7 +280,9 @@ Status NewOlapScanner::_init_tablet_reader_params( _tablet_reader_params.output_columns = _parent ? ((NewOlapScanNode*)_parent)->_maybe_read_column_ids : ((pipeline::OlapScanLocalState*)_local_state)->_maybe_read_column_ids; - + _tablet_reader_params.target_cast_type_for_variants = + _parent ? ((NewOlapScanNode*)_parent)->_cast_types_for_variants + : ((pipeline::OlapScanLocalState*)_local_state)->_cast_types_for_variants; // Condition for (auto& filter : filters) { _tablet_reader_params.conditions.push_back(filter); @@ -406,6 +411,47 @@ Status NewOlapScanner::_init_tablet_reader_params( return Status::OK(); } +vectorized::PathInData NewOlapScanner::_build_path(SlotDescriptor* slot, + const std::string& root_name) { + PathInDataBuilder path_builder; + path_builder.append(root_name, false); + for (const std::string& path : slot->column_paths()) { + path_builder.append(path, false); + } + return path_builder.build(); +} + +Status NewOlapScanner::_init_variant_columns() { + auto& tablet_schema = _tablet_reader_params.tablet_schema; + // Parent column has path info to distinction from each other + for (auto slot : _output_tuple_desc->slots()) { + if (!slot->is_materialized()) { + continue; + } + if (!slot->need_materialize()) { + continue; + } + if (slot->type().is_variant_type()) { + // Such columns are not exist in frontend schema info, so we need to + // add them into tablet_schema for later column indexing. + TabletColumn subcol; + subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); + subcol.set_is_nullable(true); + subcol.set_unique_id(-1); + subcol.set_parent_unique_id(slot->col_unique_id()); + PathInData path = _build_path( + slot, tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case()); + subcol.set_path_info(path); + subcol.set_name(path.get_path()); + if (tablet_schema->field_index(path) < 0) { + tablet_schema->append_column(subcol, TabletSchema::ColumnType::VARIANT); + } + } + schema_util::inherit_tablet_index(tablet_schema); + } + return Status::OK(); +} + Status NewOlapScanner::_init_return_columns() { for (auto* slot : _output_tuple_desc->slots()) { if (!slot->is_materialized()) { @@ -414,10 +460,17 @@ Status NewOlapScanner::_init_return_columns() { if (!slot->need_materialize()) { continue; } + + // variant column using path to index a column + int32_t index = 0; auto& tablet_schema = _tablet_reader_params.tablet_schema; - int32_t index = slot->col_unique_id() >= 0 - ? tablet_schema->field_index(slot->col_unique_id()) - : tablet_schema->field_index(slot->col_name()); + if (slot->type().is_variant_type()) { + index = tablet_schema->field_index(_build_path( + slot, tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case())); + } else { + index = slot->col_unique_id() >= 0 ? tablet_schema->field_index(slot->col_unique_id()) + : tablet_schema->field_index(slot->col_name()); + } if (index < 0) { return Status::InternalError( diff --git a/be/src/vec/exec/scan/new_olap_scanner.h b/be/src/vec/exec/scan/new_olap_scanner.h index e4864b67f4..36e6475fca 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.h +++ b/be/src/vec/exec/scan/new_olap_scanner.h @@ -93,6 +93,8 @@ private: const std::vector& function_filters); [[nodiscard]] Status _init_return_columns(); + vectorized::PathInData _build_path(SlotDescriptor* slot, const std::string& root_name); + [[nodiscard]] Status _init_variant_columns(); std::vector _key_ranges; diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index 4d798b747e..b3cfc2e48a 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -360,7 +360,6 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext BlockUPtr block = ctx->get_free_block(); status = scanner->get_block(state, block.get(), &eos); - VLOG_ROW << "VScanNode input rows: " << block->rows() << ", eos: " << eos; // The VFileScanner for external table may try to open not exist files, // Because FE file cache for external table may out of date. // So, NOT_FOUND for VFileScanner is not a fail case. @@ -371,6 +370,7 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext LOG(WARNING) << "Scan thread read VScanner failed: " << status.to_string(); break; } + VLOG_ROW << "VScanNode input rows: " << block->rows() << ", eos: " << eos; if (status.is()) { // The only case in this "if" branch is external table file delete and fe cache has not been updated yet. // Set status to OK. diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 305bafe10f..42d9b227fe 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include "exprs/bloom_filter_func.h" #include "exprs/hybrid_set.h" #include "exprs/runtime_filter.h" +#include "runtime/define_primitive_type.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/primitive_type.h" @@ -78,6 +80,11 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) { if (slot->type().is_string_type() && expr->type().is_string_type()) { return true; } + // Variant slot cast could be eliminated + // We could use predicate to speed up query, so ignore cast to build predicate + if (slot->type().is_variant_type()) { + return true; + } if (slot->type().is_array_type()) { if (slot->type().children[0].type == expr->type().type) { return true; @@ -362,24 +369,14 @@ Status VScanNode::_normalize_conjuncts() { // The conjuncts is always on output tuple, so use _output_tuple_desc; std::vector slots = _output_tuple_desc->slots(); - for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { - _colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id(); - - auto type = slots[slot_idx]->type().type; - if (slots[slot_idx]->type().type == TYPE_ARRAY) { - type = slots[slot_idx]->type().children[0].type; - if (type == TYPE_ARRAY) { - continue; - } - } + auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) { switch (type) { -#define M(NAME) \ - case TYPE_##NAME: { \ - ColumnValueRange range( \ - slots[slot_idx]->col_name(), slots[slot_idx]->is_nullable(), \ - slots[slot_idx]->type().precision, slots[slot_idx]->type().scale); \ - _slot_id_to_value_range[slots[slot_idx]->id()] = std::pair {slots[slot_idx], range}; \ - break; \ +#define M(NAME) \ + case TYPE_##NAME: { \ + ColumnValueRange range(slot->col_name(), slot->is_nullable(), \ + slot->type().precision, slot->type().scale); \ + _slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \ + break; \ } #define APPLY_FOR_PRIMITIVE_TYPE(M) \ M(TINYINT) \ @@ -404,11 +401,29 @@ Status VScanNode::_normalize_conjuncts() { APPLY_FOR_PRIMITIVE_TYPE(M) #undef M default: { - VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name() - << "]"; + VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slot->col_name() << "]"; break; } } + }; + + for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { + _colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id(); + _slot_id_to_slot_desc[slots[slot_idx]->id()] = slots[slot_idx]; + + auto type = slots[slot_idx]->type().type; + if (slots[slot_idx]->type().type == TYPE_ARRAY) { + type = slots[slot_idx]->type().children[0].type; + if (type == TYPE_ARRAY) { + continue; + } + } + init_value_range(slots[slot_idx], slots[slot_idx]->type().type); + } + + get_cast_types_for_variants(); + for (const auto& [colname, type] : _cast_types_for_variants) { + init_value_range(_slot_id_to_slot_desc[_colname_to_slot_id[colname]], type); } for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { @@ -494,6 +509,14 @@ Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExp output_expr = nullptr; return Status::OK(); } + std::shared_ptr slotref; + for (const auto& child : cur_expr->children()) { + if (VExpr::expr_without_cast(child)->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + slotref = std::dynamic_pointer_cast(VExpr::expr_without_cast(child)); + } if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || _is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range)) { Status status = Status::OK(); @@ -553,6 +576,14 @@ Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExp return Status::OK(); } + if (pdt == PushDownType::ACCEPTABLE && slotref != nullptr && + slotref->type().is_variant_type()) { + // remaining it in the expr tree, in order to filter by function if the pushdown + // predicate is not applied + output_expr = conjunct_expr_root; // remaining in conjunct tree + return Status::OK(); + } + if (pdt == PushDownType::ACCEPTABLE && (_is_key_column(slot->col_name()) || _storage_no_merge())) { output_expr = nullptr; diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 7ef0656956..187381f450 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -244,6 +244,11 @@ protected: Status _prepare_scanners(const int query_parallel_instance_num); + // For some conjunct there is chance to elimate cast operator + // Eg. Variant's sub column could eliminate cast in storage layer if + // cast dst column type equals storage column type + virtual void get_cast_types_for_variants() {} + bool _is_pipeline_scan = false; bool _shared_scan_opt = false; @@ -270,10 +275,15 @@ protected: // Save all function predicates which may be pushed down to data source. std::vector _push_down_functions; + // colname -> cast dst type + std::map _cast_types_for_variants; + // slot id -> ColumnValueRange // Parsed from conjuncts phmap::flat_hash_map> _slot_id_to_value_range; + // slot id -> SlotDescriptor + phmap::flat_hash_map _slot_id_to_slot_desc; // column -> ColumnValueRange // We use _colname_to_value_range to store a column and its conresponding value ranges. std::unordered_map _colname_to_value_range; diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 361833120b..47733a177d 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -79,6 +79,10 @@ doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDes return Status::OK(); } +const DataTypePtr& VCastExpr::get_target_type() const { + return _target_data_type; +} + doris::Status VCastExpr::open(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) { for (int i = 0; i < _children.size(); ++i) { diff --git a/be/src/vec/exprs/vcast_expr.h b/be/src/vec/exprs/vcast_expr.h index b031c819dc..3c03cb42ff 100644 --- a/be/src/vec/exprs/vcast_expr.h +++ b/be/src/vec/exprs/vcast_expr.h @@ -20,6 +20,7 @@ #include "common/object_pool.h" #include "common/status.h" +#include "runtime/define_primitive_type.h" #include "udf/udf.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/data_types/data_type.h" @@ -50,6 +51,7 @@ public: void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override; const std::string& expr_name() const override; std::string debug_string() const override; + const DataTypePtr& get_target_type() const; private: FunctionBasePtr _function; diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 5b355cc266..1a24595b2d 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -2047,8 +2047,13 @@ private: {0}, 1, input_rows_count); } } else { - // Could not cast to any other types when it hierarchical like '{"a" : 1}' - if (!data_type_to->is_nullable() && !WhichDataType(data_type_to).is_string()) { + if (variant.empty()) { + // TODO not found root cause, a tmp fix + col_to->assume_mutable()->insert_many_defaults(input_rows_count); + col_to = make_nullable(col_to, true); + } else if (!data_type_to->is_nullable() && + !WhichDataType(data_type_to).is_string()) { + // Could not cast to any other types when it hierarchical like '{"a" : 1}' // TODO we should convert as many as possible here, for examle // this variant column's root is a number column, to convert to number column // is also acceptable @@ -2086,7 +2091,6 @@ private: // set variant root column/type to from column/type auto variant = ColumnObject::create(true /*always nullable*/); variant->create_root(from_type, col_from->assume_mutable()); - block.replace_by_position(result, std::move(variant)); return Status::OK(); } @@ -2258,11 +2262,6 @@ private: } bool skip_not_null_check = false; - if (from_nested->is_nullable() && WhichDataType(to_type).is_variant_type()) { - /// Disable check for variant. Will check that column doesn't contain NULL in wrapper below. - skip_not_null_check = true; - } - auto wrapper = prepare_remove_nullable(context, from_nested, to_nested, skip_not_null_check); diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp index 9f89f1b861..515cd44757 100644 --- a/be/src/vec/json/json_parser.cpp +++ b/be/src/vec/json/json_parser.cpp @@ -96,7 +96,7 @@ void JSONDataParser::traverse(const Element& element, } else if (element.isArray()) { has_nested = false; checkHasNested(element); - if (has_nested && !parse_nested && !config::enable_flatten_nested_for_variant) { + if (has_nested && !parse_nested && !config::variant_enable_flatten_nested) { // Parse nested arrays to JsonbField JsonbWriter writer; traverseArrayAsJsonb(element.getArray(), writer); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index cc879d24eb..2820cca53c 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -1061,16 +1061,24 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( void OlapBlockDataConvertor::OlapColumnDataConvertorVariant::set_source_column( const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t num_rows) { // set - auto variant = assert_cast(*typed_column.column); - if (!variant.is_finalized()) { - variant.finalize(); + const ColumnNullable* nullable_column = nullptr; + if (typed_column.column->is_nullable()) { + nullable_column = assert_cast(typed_column.column.get()); + _nullmap = nullable_column->get_null_map_data().data(); } - auto root = variant.get_root(); - auto nullable = assert_cast(root.get()); + const auto& variant = + nullable_column == nullptr + ? assert_cast(*typed_column.column) + : assert_cast( + nullable_column->get_nested_column()); + + const_cast(variant).finalize_if_not(); + auto root_of_variant = variant.get_root(); + auto nullable = assert_cast(root_of_variant.get()); CHECK(nullable); _root_data_column = assert_cast(&nullable->get_nested_column()); - _nullmap = nullable->get_null_map_data().data(); - _root_data_convertor->set_source_column({root->get_ptr(), nullptr, ""}, row_pos, num_rows); + _root_data_convertor->set_source_column({root_of_variant->get_ptr(), nullptr, ""}, row_pos, + num_rows); OlapBlockDataConvertor::OlapColumnDataConvertorBase::set_source_column(typed_column, row_pos, num_rows); } diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 0041d28ca2..979b8d13fd 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -495,7 +495,9 @@ private: const void* get_data_at(size_t offset) const override; private: + // encodes sparsed columns const ColumnString* _root_data_column; + // _nullmap contains null info for this variant std::unique_ptr _root_data_convertor; }; diff --git a/docs/en/docs/admin-manual/config/be-config.md b/docs/en/docs/admin-manual/config/be-config.md index 2250c06115..cccd7b9ff1 100644 --- a/docs/en/docs/admin-manual/config/be-config.md +++ b/docs/en/docs/admin-manual/config/be-config.md @@ -1464,11 +1464,6 @@ Indicates how many tablets failed to load in the data directory. At the same tim * Description: Default dirs to put jdbc drivers. * Default value: `${DORIS_HOME}/jdbc_drivers` -#### `enable_parse_multi_dimession_array` - -* Description: Whether parse multidimensional array, if false encountering will return ERROR -* Default value: true - #### `enable_simdjson_reader` * Description: Whether enable simdjson to parse json while stream load diff --git a/docs/zh-CN/docs/admin-manual/config/be-config.md b/docs/zh-CN/docs/admin-manual/config/be-config.md index 586958e3a6..058b66ef71 100644 --- a/docs/zh-CN/docs/admin-manual/config/be-config.md +++ b/docs/zh-CN/docs/admin-manual/config/be-config.md @@ -1493,11 +1493,6 @@ load tablets from header failed, failed tablets size: xxx, path=xxx * 描述: 存放 jdbc driver 的默认目录。 * 默认值: `${DORIS_HOME}/jdbc_drivers` -#### `enable_parse_multi_dimession_array` - -* 描述: 在动态表中是否解析多维数组,如果是false遇到多维数组则会报错。 -* 默认值: true - #### `enable_simdjson_reader` * 描述: 是否在导入json数据时用simdjson来解析。 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index b47880fe92..3a7b5b1b5f 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -440,6 +440,7 @@ terminal String KW_JOIN, KW_JSON, KW_JSONB, + KW_VARIANT, KW_KEY, KW_KEYS, KW_KILL, @@ -766,6 +767,7 @@ nonterminal String sequence_col_clause; nonterminal Predicate predicate, between_predicate, comparison_predicate, compound_predicate, in_predicate, like_predicate, exists_predicate, match_predicate; nonterminal ArrayList opt_partition_by_clause; +nonterminal ArrayList sub_column_path; nonterminal Expr having_clause; nonterminal ArrayList order_by_elements, order_by_clause; nonterminal OrderByElement order_by_element; @@ -966,6 +968,7 @@ precedence nonassoc KW_WITH; precedence left KW_FULL, KW_MERGE; precedence left DOT; +precedence left COLON; precedence left SET_VAR; precedence left KW_OR; precedence left KW_AND; @@ -6270,6 +6273,8 @@ type ::= {: RESULT = ScalarType.createJsonbType(); :} | KW_TEXT {: RESULT = ScalarType.createStringType(); :} + | KW_VARIANT + {: RESULT = ScalarType.createVariantType(); :} | KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createVarcharType(len.intValue()); RESULT = type; @@ -7071,6 +7076,18 @@ compound_predicate ::= {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, e, null); :} ; +sub_column_path ::= + ident: subcol + {: + RESULT = Lists.newArrayList(subcol); + :} + | sub_column_path:list DOT ident:subcol + {: + list.add(subcol); + RESULT = list; + :} + ; + column_ref ::= ident:col {: RESULT = new SlotRef(null, col); :} @@ -7081,6 +7098,16 @@ column_ref ::= {: RESULT = new SlotRef(new TableName(null, db, tbl), col); :} | ident:ctl DOT ident:db DOT ident:tbl DOT ident:col {: RESULT = new SlotRef(new TableName(ctl, db, tbl), col); :} + + | ident:pcol COLON sub_column_path:lables + {: RESULT = new SlotRef(null, pcol, lables); :} + // table_name:tblName DOT IDENT:pcol causes reduce/reduce conflicts + | ident:tbl DOT ident:pcol COLON sub_column_path:lables + {: RESULT = new SlotRef(new TableName(null, null, tbl), pcol, lables); :} + | ident:db DOT ident:tbl DOT ident:pcol COLON sub_column_path:lables + {: RESULT = new SlotRef(new TableName(null, db, tbl), pcol, lables); :} + | ident:ctl DOT ident:db DOT ident:tbl DOT ident:pcol COLON sub_column_path:lables + {: RESULT = new SlotRef(new TableName(ctl, db, tbl), pcol, lables); :} ; column_ref_list ::= diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index 99c436aeb1..4e648bf1db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -91,6 +91,7 @@ import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.Iterator; @@ -124,6 +125,11 @@ public class Analyzer { // map from lowercase qualified column name ("alias.col") to descriptor private final Map slotRefMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + // Notice: it's case sensitive + // Variant column name -> Paths of sub columns + private final Map, SlotDescriptor>> subColumnSlotRefMap + = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + // map from tuple id to list of conjuncts referencing tuple private final Map> tuplePredicates = Maps.newHashMap(); // map from slot id to list of conjuncts referencing slot @@ -922,7 +928,8 @@ public class Analyzer { * @param colName * @throws AnalysisException */ - public SlotDescriptor registerColumnRef(TableName tblName, String colName) throws AnalysisException { + public SlotDescriptor registerColumnRef(TableName tblName, String colName, List subColNames) + throws AnalysisException { TupleDescriptor d; TableName newTblName = tblName; if (newTblName == null) { @@ -1004,11 +1011,55 @@ public class Analyzer { newTblName == null ? d.getTable().getName() : newTblName.toString()); } + LOG.debug("register column ref table {}, colName {}, col {}", tblName, colName, col.toSql()); + if (col.getType().isVariantType() || (subColNames != null && !subColNames.isEmpty())) { + if (!col.getType().isVariantType()) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_ILLEGAL_COLUMN_REFERENCE_ERROR, + Joiner.on(".").join(tblName.getTbl(), colName)); + } + if (subColNames == null) { + // Root + subColNames = new ArrayList(); + } + String key = d.getAlias() + "." + col.getName(); + if (subColumnSlotRefMap.get(key) == null) { + subColumnSlotRefMap.put(key, Maps.newTreeMap( + new Comparator>() { + public int compare(List lst1, List lst2) { + Iterator it1 = lst1.iterator(); + Iterator it2 = lst2.iterator(); + while (it1.hasNext() && it2.hasNext()) { + int result = it1.next().compareTo(it2.next()); + if (result != 0) { + return result; + } + } + return Integer.compare(lst1.size(), lst2.size()); + } + })); + } + SlotDescriptor result = subColumnSlotRefMap.get(key).get(subColNames); + if (result != null) { + // avoid duplicate slots + return result; + } + result = globalState.descTbl.addSlotDescriptor(d); + LOG.debug("register slot descriptor {}", result); + result.setSubColLables(subColNames); + result.setColumn(col); + if (!subColNames.isEmpty()) { + result.setMaterializedColumnName(col.getName() + "." + String.join(".", subColNames)); + } + result.setIsMaterialized(true); + result.setIsNullable(col.isAllowNull()); + subColumnSlotRefMap.get(key).put(subColNames, result); + return result; + } + // Make column name case insensitive String key = d.getAlias() + "." + col.getName(); SlotDescriptor result = slotRefMap.get(key); if (result != null) { - result.setMultiRef(true); return result; } result = globalState.descTbl.addSlotDescriptor(d); @@ -1032,7 +1083,6 @@ public class Analyzer { String key = colName; SlotDescriptor result = slotRefMap.get(key); if (result != null) { - result.setMultiRef(true); return result; } result = addSlotDescriptor(tupleDescriptor); @@ -1110,6 +1160,7 @@ public class Analyzer { result.setStats(srcSlotDesc.getStats()); result.setType(srcSlotDesc.getType()); result.setIsNullable(srcSlotDesc.getIsNullable()); + result.setSubColLables(srcSlotDesc.getSubColLables()); if (srcSlotDesc.getColumn() != null) { result.setColumn(srcSlotDesc.getColumn()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 26f75d460a..fc4e46d314 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1019,6 +1019,10 @@ public abstract class Expr extends TreeNode implements ParseNode, Cloneabl return toSql(); } + public List toSubColumnLabel() { + return Lists.newArrayList(); + } + // Convert this expr, including all children, to its Thrift representation. public TExpr treeToThrift() { TExpr result = new TExpr(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index ab1584c5f7..f0ae827bc5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -219,7 +219,8 @@ public class IndexDef { colType = ((ArrayType) column.getType()).getItemType().getPrimitiveType(); } if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type() - || colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN)) { + || colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN + || colType.isVariantType())) { throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " + "invalid column: " + indexColName); } else if (indexType == IndexType.INVERTED diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java index 4274ed37f3..d331e090b1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java @@ -56,6 +56,7 @@ public class InlineViewRef extends TableRef { // and column labels used in the query definition. Either all or none of the column // labels must be overridden. private List explicitColLabels; + private List> explicitSubColPath; // /////////////////////////////////////// // BEGIN: Members that need to be reset() @@ -97,6 +98,7 @@ public class InlineViewRef extends TableRef { public InlineViewRef(String alias, QueryStmt queryStmt, List colLabels) { this(alias, queryStmt); explicitColLabels = Lists.newArrayList(colLabels); + LOG.debug("inline view explicitColLabels {}", explicitColLabels); } /** @@ -153,6 +155,12 @@ public class InlineViewRef extends TableRef { return queryStmt.getColLabels(); } + public List> getSubColPath() { + if (explicitSubColPath != null) { + return explicitSubColPath; + } + return queryStmt.getSubColPath(); + } @Override public void reset() { @@ -227,9 +235,12 @@ public class InlineViewRef extends TableRef { // TODO: relax this a bit by allowing propagation out of the inline view (but // not into it) List slots = analyzer.changeSlotToNullableOfOuterJoinedTuples(); + LOG.debug("inline view query {}", queryStmt.toSql()); for (int i = 0; i < getColLabels().size(); ++i) { String colName = getColLabels().get(i); - SlotDescriptor slotDesc = analyzer.registerColumnRef(getAliasAsName(), colName); + LOG.debug("inline view register {}", colName); + SlotDescriptor slotDesc = analyzer.registerColumnRef(getAliasAsName(), + colName, getSubColPath().get(i)); Expr colExpr = queryStmt.getResultExprs().get(i); if (queryStmt instanceof SelectStmt && ((SelectStmt) queryStmt).getValueList() != null) { ValueList valueList = ((SelectStmt) queryStmt).getValueList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java index 4196f774e2..e6fcefb7e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java @@ -105,7 +105,7 @@ public class InvertedIndexUtil { parser = INVERTED_INDEX_PARSER_NONE; } - if (colType.isStringType()) { + if (colType.isStringType() || colType.isVariantType()) { if (!(parser.equals(INVERTED_INDEX_PARSER_NONE) || parser.equals(INVERTED_INDEX_PARSER_STANDARD) || parser.equals(INVERTED_INDEX_PARSER_UNICODE) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/QueryStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/QueryStmt.java index 3850f594c9..f620d1ba94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/QueryStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/QueryStmt.java @@ -591,6 +591,8 @@ public abstract class QueryStmt extends StatementBase implements Queriable { */ public abstract ArrayList getColLabels(); + public abstract ArrayList> getSubColPath(); + /** * Returns the materialized tuple ids of the output of this stmt. * Used in case this stmt is part of an @InlineViewRef, diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectListItem.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectListItem.java index 34368c7b17..f0a841df57 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectListItem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectListItem.java @@ -22,6 +22,8 @@ package org.apache.doris.analysis; import com.google.common.base.Preconditions; +import java.util.List; + public class SelectListItem { private Expr expr; // for "[name.]*" @@ -149,6 +151,12 @@ public class SelectListItem { return "__" + expr.getExprName() + "_" + position; } + public List toSubColumnLabels() { + Preconditions.checkState(!isStar()); + return expr.toSubColumnLabel(); + } + + public void setAlias(String alias) { this.alias = alias; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index bb10d22080..1a84a24708 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -90,6 +90,7 @@ public class SelectStmt extends QueryStmt { protected SelectList selectList; private final ArrayList colLabels; // lower case column labels + private final ArrayList> subColPath; // case insensitive column labels protected FromClause fromClause; protected GroupByClause groupByClause; private List originalExpr; @@ -145,6 +146,7 @@ public class SelectStmt extends QueryStmt { this.selectList = new SelectList(); this.fromClause = new FromClause(); this.colLabels = Lists.newArrayList(); + this.subColPath = Lists.newArrayList(); } public SelectStmt( @@ -171,6 +173,7 @@ public class SelectStmt extends QueryStmt { this.havingClause = havingPredicate; this.colLabels = Lists.newArrayList(); + this.subColPath = Lists.newArrayList(); this.havingPred = null; this.aggInfo = null; this.sortInfo = null; @@ -191,6 +194,7 @@ public class SelectStmt extends QueryStmt { other.havingClauseAfterAnalyzed != null ? other.havingClauseAfterAnalyzed.clone() : null; colLabels = Lists.newArrayList(other.colLabels); + subColPath = Lists.newArrayList(other.subColPath); aggInfo = (other.aggInfo != null) ? other.aggInfo.clone() : null; analyticInfo = (other.analyticInfo != null) ? other.analyticInfo.clone() : null; sqlString = (other.sqlString != null) ? other.sqlString : null; @@ -213,6 +217,7 @@ public class SelectStmt extends QueryStmt { super.reset(); selectList.reset(); colLabels.clear(); + subColPath.clear(); fromClause.reset(); if (whereClause != null) { whereClause.reset(); @@ -368,6 +373,12 @@ public class SelectStmt extends QueryStmt { return colLabels; } + @Override + public ArrayList> getSubColPath() { + return subColPath; + } + + public ExprSubstitutionMap getBaseTblSmap() { return baseTblSmap; } @@ -591,6 +602,7 @@ public class SelectStmt extends QueryStmt { } aliasSMap.put(aliasRef, item.getExpr().clone()); colLabels.add(columnLabel); + subColPath.add(item.toSubColumnLabels()); } } } @@ -632,6 +644,7 @@ public class SelectStmt extends QueryStmt { resultExprs.add(rewriteQueryExprByMvColumnExpr(expr, analyzer)); } colLabels.add("col_" + colLabels.size()); + subColPath.add(expr.toSubColumnLabel()); } } // analyze valueList if exists @@ -816,7 +829,8 @@ public class SelectStmt extends QueryStmt { LOG.debug("only support duplicate key or MOW model"); return false; } - if (!olapTable.getEnableLightSchemaChange() || !Strings.isNullOrEmpty(olapTable.getStoragePolicy())) { + if (!olapTable.getEnableLightSchemaChange() || !Strings.isNullOrEmpty(olapTable.getStoragePolicy()) + || olapTable.hasVariantColumns()) { return false; } if (getOrderByElements() != null) { @@ -1240,6 +1254,8 @@ public class SelectStmt extends QueryStmt { slot.setTupleId(desc.getId()); resultExprs.add(rewriteQueryExprByMvColumnExpr(slot, analyzer)); colLabels.add(col.getName()); + // empty sub lables + subColPath.add(Lists.newArrayList()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java index 984f2c5822..ac2dafd2f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java @@ -797,6 +797,12 @@ public class SetOperationStmt extends QueryStmt { return operands.get(0).getQueryStmt().getColLabels(); } + @Override + public ArrayList> getSubColPath() { + Preconditions.checkState(operands.size() > 0); + return operands.get(0).getQueryStmt().getSubColPath(); + } + @Override public void setNeedToSql(boolean needToSql) { super.setNeedToSql(needToSql); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index c5291414b1..6bc544a11f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -44,6 +44,12 @@ public class SlotDescriptor { // for SlotRef.toSql() in the absence of a path private String label; + // for variant column's sub column lables + private List subColPath; + // materializedColumnName is the target name of a slot + // it could be either column name or a composed name for a variant + // subcolumn like `a.b.c` + private String materializedColumnName; // Expr(s) materialized into this slot; multiple exprs for unions. Should be empty if // path_ is set. @@ -64,7 +70,6 @@ public class SlotDescriptor { private ColumnStats stats; // only set if 'column' isn't set private boolean isAgg; - private boolean isMultiRef; // If set to false, then such slots will be ignored during // materialize them.Used to optimize to read less data and less memory usage private boolean needMaterialize = true; @@ -77,7 +82,6 @@ public class SlotDescriptor { this.isMaterialized = false; this.isNullable = true; this.isAgg = false; - this.isMultiRef = false; } public SlotDescriptor(SlotId id, TupleDescriptor parent, SlotDescriptor src) { @@ -95,14 +99,6 @@ public class SlotDescriptor { this.sourceExprs.add(new SlotRef(src)); } - public boolean isMultiRef() { - return isMultiRef; - } - - public void setMultiRef(boolean isMultiRef) { - this.isMultiRef = isMultiRef; - } - public boolean getIsAgg() { return isAgg; } @@ -123,6 +119,14 @@ public class SlotDescriptor { return id; } + public void setSubColLables(List subColPath) { + this.subColPath = subColPath; + } + + public List getSubColLables() { + return this.subColPath; + } + public TupleDescriptor getParent() { return parent; } @@ -212,6 +216,10 @@ public class SlotDescriptor { this.stats = stats; } + public void setMaterializedColumnName(String name) { + this.materializedColumnName = name; + } + public ColumnStats getStats() { if (stats == null) { if (column != null) { @@ -295,9 +303,10 @@ public class SlotDescriptor { public TSlotDescriptor toThrift() { // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask + String colName = materializedColumnName != null ? materializedColumnName : + ((column != null) ? column.getNonShadowName() : ""); TSlotDescriptor tSlotDescriptor = new TSlotDescriptor(id.asInt(), parent.getId().asInt(), type.toThrift(), -1, - byteOffset, 0, getIsNullable() ? 0 : -1, - ((column != null) ? column.getNonShadowName() : ""), slotIdx, + byteOffset, 0, getIsNullable() ? 0 : -1, colName, slotIdx, isMaterialized); tSlotDescriptor.setNeedMaterialize(needMaterialize); tSlotDescriptor.setIsAutoIncrement(isAutoInc); @@ -308,6 +317,9 @@ public class SlotDescriptor { tSlotDescriptor.setIsKey(column.isKey()); tSlotDescriptor.setColDefaultValue(column.getDefaultValue()); } + if (subColPath != null) { + tSlotDescriptor.setColumnPaths(subColPath); + } return tSlotDescriptor; } @@ -318,7 +330,7 @@ public class SlotDescriptor { return MoreObjects.toStringHelper(this).add("id", id.asInt()).add("parent", parentTupleId).add("col", colStr) .add("type", typeStr).add("materialized", isMaterialized).add("byteSize", byteSize) .add("byteOffset", byteOffset).add("slotIdx", slotIdx).add("nullable", getIsNullable()) - .add("isAutoIncrement", isAutoInc).toString(); + .add("isAutoIncrement", isAutoInc).add("subColPath", subColPath).toString(); } @Override @@ -335,6 +347,7 @@ public class SlotDescriptor { .append(", type=").append(type == null ? "null" : type.toSql()) .append(", nullable=").append(isNullable) .append(", isAutoIncrement=").append(isAutoInc) + .append(", subColPath=").append(subColPath) .append("}") .toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java index ec5221bf6e..62238b4dfb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -57,6 +57,7 @@ public class SlotRef extends Expr { private String col; // Used in toSql private String label; + private List subColPath; // results of analysis protected SlotDescriptor desc; @@ -73,6 +74,14 @@ public class SlotRef extends Expr { this.label = "`" + col + "`"; } + public SlotRef(TableName tblName, String col, List subColPath) { + super(); + this.tblName = tblName; + this.col = col; + this.label = "`" + col + "`"; + this.subColPath = subColPath; + } + // C'tor for a "pre-analyzed" ref to slot that doesn't correspond to // a table's column. public SlotRef(SlotDescriptor desc) { @@ -86,6 +95,7 @@ public class SlotRef extends Expr { if (this.type.equals(Type.CHAR)) { this.type = Type.VARCHAR; } + this.subColPath = desc.getSubColLables(); analysisDone(); } @@ -109,6 +119,7 @@ public class SlotRef extends Expr { label = other.label; desc = other.desc; tupleId = other.tupleId; + subColPath = other.subColPath; } @Override @@ -202,7 +213,7 @@ public class SlotRef extends Expr { @Override public void analyzeImpl(Analyzer analyzer) throws AnalysisException { - desc = analyzer.registerColumnRef(tblName, col); + desc = analyzer.registerColumnRef(tblName, col, subColPath); type = desc.getType(); if (this.type.equals(Type.CHAR)) { this.type = Type.VARCHAR; @@ -229,6 +240,7 @@ public class SlotRef extends Expr { helper.add("type", type.toSql()); helper.add("label", label); helper.add("tblName", tblName != null ? tblName.toSql() : "null"); + helper.add("subColPath", subColPath); return helper.toString(); } @@ -315,6 +327,10 @@ public class SlotRef extends Expr { return this.exprName.get(); } + public List toSubColumnLabel() { + return subColPath; + } + @Override protected void toThrift(TExprNode msg) { msg.node_type = TExprNodeType.SLOT_REF; @@ -333,7 +349,14 @@ public class SlotRef extends Expr { if (desc != null) { return desc.getId().hashCode(); } - return Objects.hashCode((tblName == null ? "" : tblName.toSql() + "." + label).toLowerCase()); + if (subColPath == null || subColPath.isEmpty()) { + return Objects.hashCode((tblName == null ? "" : tblName.toSql() + "." + label).toLowerCase()); + } + int result = Objects.hashCode((tblName == null ? "" : tblName.toSql() + "." + label).toLowerCase()); + for (String sublabel : subColPath) { + result = 31 * result + Objects.hashCode(sublabel); + } + return result; } @Override @@ -368,6 +391,13 @@ public class SlotRef extends Expr { if (col != null && !col.equalsIgnoreCase(other.col)) { return false; } + if ((subColPath == null) != (other.subColPath == null)) { + return false; + } + if (subColPath != null + && subColPath.equals(other.subColPath)) { + return false; + } return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java index e4e130fc6f..f6b74abfd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java @@ -177,6 +177,15 @@ public class TupleDescriptor { return null; } + public boolean hasVariantCol() { + for (SlotDescriptor slotDesc : slots) { + if (slotDesc.getColumn() != null && slotDesc.getColumn().getType().isVariantType()) { + return true; + } + } + return false; + } + public TableIf getTable() { return table; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 94df98f000..744e6cf9ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -2418,4 +2418,13 @@ public class OlapTable extends Table { public boolean needReadLockWhenPlan() { return true; } + + public boolean hasVariantColumns() { + for (Column column : getBaseSchema()) { + if (column.getType().isVariantType()) { + return true; + } + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java index a06e09e30f..4375105e46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java @@ -202,6 +202,17 @@ public class MaterializedViewSelector { for (Map.Entry entry : candidateIndexIdToMeta.entrySet()) { result.put(entry.getKey(), entry.getValue().getSchema()); } + // For query like `select v:a from tbl` when column v is variant type but v:a is not expicity + // in index, so the above check will filter all index. But we should at least choose the base + // index at present.TODO we should better handle it. + LOG.debug("result {}, has variant col {}, tuple {}", result, + analyzer.getTupleDesc(scanNode.getTupleId()).hasVariantCol(), + analyzer.getTupleDesc(scanNode.getTupleId()).toString()); + if (result.keySet().size() == 0 && scanNode.getOlapTable() + .getBaseSchema().stream().anyMatch(column -> column.getType().isVariantType())) { + LOG.info("Using base schema"); + result.put(scanNode.getOlapTable().getBaseIndexId(), scanNode.getOlapTable().getBaseSchema()); + } return result; } @@ -577,7 +588,8 @@ public class MaterializedViewSelector { candidateIndexSchema .forEach(column -> indexColumnNames.add(CreateMaterializedViewStmt .mvColumnBreaker(MaterializedIndexMeta.normalizeName(column.getName())))); - + LOG.debug("candidateIndexSchema {}, indexColumnNames {}, queryColumnNames {}", + candidateIndexSchema, indexColumnNames, queryColumnNames); // Rollup index have no define expr. if (entry.getValue().getWhereClause() == null && indexExprs.isEmpty() && !indexColumnNames.containsAll(queryColumnNames)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 621def175e..66751d6a91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -17,9 +17,7 @@ package org.apache.doris.service; -import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.analysis.AbstractBackupTableRefClause; -import org.apache.doris.analysis.AddColumnsClause; import org.apache.doris.analysis.AddPartitionClause; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.ColumnDef; @@ -40,7 +38,6 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.Index; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -106,8 +103,6 @@ import org.apache.doris.tablefunction.MetadataGenerator; import org.apache.doris.task.StreamLoadTask; import org.apache.doris.thrift.FrontendService; import org.apache.doris.thrift.FrontendServiceVersion; -import org.apache.doris.thrift.TAddColumnsRequest; -import org.apache.doris.thrift.TAddColumnsResult; import org.apache.doris.thrift.TAutoIncrementRangeRequest; import org.apache.doris.thrift.TAutoIncrementRangeResult; import org.apache.doris.thrift.TBackend; @@ -116,7 +111,6 @@ import org.apache.doris.thrift.TBeginTxnResult; import org.apache.doris.thrift.TBinlog; import org.apache.doris.thrift.TCheckAuthRequest; import org.apache.doris.thrift.TCheckAuthResult; -import org.apache.doris.thrift.TColumn; import org.apache.doris.thrift.TColumnDef; import org.apache.doris.thrift.TColumnDesc; import org.apache.doris.thrift.TCommitTxnRequest; @@ -236,15 +230,11 @@ import org.apache.logging.log4j.Logger; import org.apache.thrift.TException; import java.io.StringReader; -import java.time.Instant; -import java.time.ZoneId; -import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -255,7 +245,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.function.IntSupplier; import java.util.stream.Collectors; // Frontend service used to serve all request for this frontend through @@ -467,156 +456,6 @@ public class FrontendServiceImpl implements FrontendService.Iface { defaultVal, comment, true); } - @Override - public TAddColumnsResult addColumns(TAddColumnsRequest request) throws TException { - String clientAddr = getClientAddrAsString(); - LOG.debug("schema change clientAddr: {}, request: {}", clientAddr, request); - - TStatus status = new TStatus(TStatusCode.OK); - List allColumns = new ArrayList(); - - Env env = Env.getCurrentEnv(); - InternalCatalog catalog = env.getInternalCatalog(); - int schemaVersion = 0; - try { - if (!env.isMaster()) { - status.setStatusCode(TStatusCode.ILLEGAL_STATE); - status.addToErrorMsgs("retry rpc request to master."); - TAddColumnsResult result = new TAddColumnsResult(); - result.setStatus(status); - return result; - } - TableName tableName = new TableName("", request.getDbName(), request.getTableName()); - if (request.getTableId() > 0) { - tableName = catalog.getTableNameByTableId(request.getTableId()); - } - if (tableName == null) { - throw new MetaNotFoundException("table_id " + request.getTableId() + " does not exist"); - } - - Database db = catalog.getDbNullable(tableName.getDb()); - if (db == null) { - throw new MetaNotFoundException("db " + tableName.getDb() + " does not exist"); - } - - List addColumns = request.getAddColumns(); - boolean queryMode = false; - if (addColumns == null || addColumns.size() == 0) { - queryMode = true; - } - - // rpc only olap table - OlapTable olapTable = (OlapTable) db.getTableOrMetaException(tableName.getTbl(), TableType.OLAP); - olapTable.writeLockOrMetaException(); - - try { - olapTable.checkNormalStateForAlter(); - List columnDefs = new ArrayList(); - - // prepare columnDefs - for (TColumnDef tColumnDef : addColumns) { - if (request.isAllowTypeConflict()) { - // ignore column with same name - boolean hasSameNameColumn = false; - for (Column column : olapTable.getBaseSchema()) { - if (column.getName().equalsIgnoreCase(tColumnDef.getColumnDesc().getColumnName())) { - hasSameNameColumn = true; - } - } - // ignore this column - if (hasSameNameColumn) { - continue; - } - } - String comment = tColumnDef.getComment(); - if (comment == null || comment.length() == 0) { - Instant ins = Instant.ofEpochSecond(System.currentTimeMillis() / 1000); - ZonedDateTime zdt = ins.atZone(ZoneId.systemDefault()); - comment = "auto change " + zdt.toString(); - } - - TColumnDesc tColumnDesc = tColumnDef.getColumnDesc(); - ColumnDef columnDef = initColumnfromThrift(tColumnDesc, comment); - columnDefs.add(columnDef); - } - - if (!queryMode && !columnDefs.isEmpty()) { - // create AddColumnsClause - AddColumnsClause addColumnsClause = new AddColumnsClause(columnDefs, null, null); - addColumnsClause.analyze(null); - - // index id -> index schema - Map> indexSchemaMap = new HashMap<>(); - // index id -> index col_unique_id supplier - Map colUniqueIdSupplierMap = new HashMap<>(); - for (Map.Entry> entry : olapTable.getIndexIdToSchema(true).entrySet()) { - indexSchemaMap.put(entry.getKey(), new LinkedList<>(entry.getValue())); - IntSupplier colUniqueIdSupplier = null; - if (olapTable.getEnableLightSchemaChange()) { - colUniqueIdSupplier = new IntSupplier() { - public int pendingMaxColUniqueId = olapTable - .getIndexMetaByIndexId(entry.getKey()).getMaxColUniqueId(); - - @Override - public int getAsInt() { - pendingMaxColUniqueId++; - return pendingMaxColUniqueId; - } - }; - } - colUniqueIdSupplierMap.put(entry.getKey(), colUniqueIdSupplier); - } - // 4. call schame change function, only for dynamic table feature. - SchemaChangeHandler schemaChangeHandler = new SchemaChangeHandler(); - - boolean lightSchemaChange = schemaChangeHandler.processAddColumns( - addColumnsClause, olapTable, indexSchemaMap, true, colUniqueIdSupplierMap); - if (lightSchemaChange) { - // for schema change add column optimize, direct modify table meta. - List newIndexes = olapTable.getCopiedIndexes(); - long jobId = Env.getCurrentEnv().getNextId(); - Env.getCurrentEnv().getSchemaChangeHandler().modifyTableLightSchemaChange( - "", - db, olapTable, indexSchemaMap, newIndexes, null, false, jobId, false); - } else { - throw new MetaNotFoundException("table_id " - + request.getTableId() + " cannot light schema change through rpc."); - } - } - - // 5. build all columns - for (Column column : olapTable.getBaseSchema()) { - allColumns.add(column.toThrift()); - } - schemaVersion = olapTable.getBaseSchemaVersion(); - } catch (Exception e) { - LOG.warn("got exception add columns: ", e); - status.setStatusCode(TStatusCode.INTERNAL_ERROR); - status.addToErrorMsgs(e.getMessage()); - } finally { - olapTable.writeUnlock(); - } - } catch (MetaNotFoundException e) { - status.setStatusCode(TStatusCode.NOT_FOUND); - status.addToErrorMsgs(e.getMessage()); - } catch (UserException e) { - status.setStatusCode(TStatusCode.INVALID_ARGUMENT); - status.addToErrorMsgs(e.getMessage()); - } catch (Exception e) { - LOG.warn("got exception add columns: ", e); - status.setStatusCode(TStatusCode.INTERNAL_ERROR); - status.addToErrorMsgs(e.getMessage()); - } - - TAddColumnsResult result = new TAddColumnsResult(); - result.setStatus(status); - result.setTableId(request.getTableId()); - result.setAllColumns(allColumns); - result.setSchemaVersion(schemaVersion); - LOG.debug("result: {}", result); - return result; - } - @LogException @Override public TGetTablesResult getTableNames(TGetTablesParams params) throws TException { diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index 23fb26ef47..7ad9845701 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -292,6 +292,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("join", new Integer(SqlParserSymbols.KW_JOIN)); keywordMap.put("json", new Integer(SqlParserSymbols.KW_JSON)); keywordMap.put("jsonb", new Integer(SqlParserSymbols.KW_JSONB)); + keywordMap.put("variant", new Integer(SqlParserSymbols.KW_VARIANT)); keywordMap.put("key", new Integer(SqlParserSymbols.KW_KEY)); keywordMap.put("keys", new Integer(SqlParserSymbols.KW_KEYS)); keywordMap.put("kill", new Integer(SqlParserSymbols.KW_KILL)); diff --git a/gensrc/proto/internal_service.proto b/gensrc/proto/internal_service.proto index 2a9260c450..529977f43d 100644 --- a/gensrc/proto/internal_service.proto +++ b/gensrc/proto/internal_service.proto @@ -589,6 +589,7 @@ message PTabletWriteSlaveRequest { message IndexSize { required int64 indexId = 1; required int64 size = 2; + optional string suffix_path = 3; }; message IndexSizeMap{ diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 68a792623a..0f24f6b8d2 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -223,6 +223,7 @@ message TabletIndexPB { optional IndexType index_type = 3; repeated int32 col_unique_id = 4; map properties = 5; + optional string index_suffix_name = 6; } enum SortType { diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index c71bedd6d9..4e4ea560d5 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -183,7 +183,7 @@ message ColumnMetaPB { // required by array/struct/map reader to create child reader. optional uint64 num_rows = 11; repeated string children_column_names = 12; - + // persist info for PathInData that represents path in document, e.g. JSON. optional ColumnPathInfo column_path_info = 13; diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index e94526b4f1..238ca9018f 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -905,23 +905,6 @@ struct TFetchSchemaTableDataResult { 2: optional list data_batch; } -// Only support base table add columns -struct TAddColumnsRequest { - 1: optional i64 table_id - 2: optional list addColumns - 3: optional string table_name - 4: optional string db_name - 5: optional bool allow_type_conflict -} - -// Only support base table add columns -struct TAddColumnsResult { - 1: optional Status.TStatus status - 2: optional i64 table_id - 3: optional list allColumns - 4: optional i32 schema_version -} - struct TMySqlLoadAcquireTokenResult { 1: optional Status.TStatus status 2: optional string token @@ -1354,8 +1337,6 @@ service FrontendService { TFrontendPingFrontendResult ping(1: TFrontendPingFrontendRequest request) - TAddColumnsResult addColumns(1: TAddColumnsRequest request) - TInitExternalCtlMetaResult initExternalCtlMeta(1: TInitExternalCtlMetaRequest request) TFetchSchemaTableDataResult fetchSchemaTableData(1: TFetchSchemaTableDataRequest request) diff --git a/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs1.out b/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs1.out new file mode 100644 index 0000000000..4e3b7ce7d5 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs1.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !affinityByIssuesAndPRs1 -- +apache/spark 3 3 +rspec/rspec-core 2 2 +golden-warning/giraffedraft-server 15 1 +rspec/rspec-support 1 1 +rspec/rspec-rails 1 1 +rspec/rspec-mocks 1 1 +rspec/rspec-expectations 1 1 +rspec/rspec-dev 1 1 + diff --git a/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs2.out b/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs2.out new file mode 100644 index 0000000000..d825c03556 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/affinityByIssuesAndPRs2.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !affinityByIssuesAndPRs2 -- +No-CQRT/GooGuns 44 1 +ivolunteerph/ivolunteerph 3 1 +Tribler/tribler 1 1 + diff --git a/regression-test/data/variant_github_events_p0/authorsWithTheMostPushes.out b/regression-test/data/variant_github_events_p0/authorsWithTheMostPushes.out new file mode 100644 index 0000000000..70f693578c --- /dev/null +++ b/regression-test/data/variant_github_events_p0/authorsWithTheMostPushes.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !authorsWithTheMostPushes -- +github-actions[bot] 1428 806 +seryozha1989 765 12 +KenanSulayman 319 1 +LombiqBot 270 5 +freecall2019 225 1 +hotspotlab 220 1 +hotspot2023 218 1 +renovate[bot] 177 106 +gyuho 170 23 +mirror-updates 168 4 +proxylist-to-bot 164 1 +cageyjames 119 1 +qdm 117 1 +milesholt 107 1 +greatfire 96 2 +Hall-1910 95 1 +apaolacci 95 1 +dependabot[bot] 83 43 +729300gahycus 79 1 +pull[bot] 69 69 +himobi 65 1 +patrick-hudson 61 1 +naijaping 55 1 +fake-name 54 1 +supervpnops 54 2 +brokjad 51 1 +tui590285 49 3 +renovate-bot 47 46 +audreywatters 45 19 +sanliuyi201 45 1 +predictcrypto 44 1 +cm-gerrit 41 24 +B74LABgit 40 3 +PacoReinaCampo 37 37 +vpnsuperapp 37 1 +BlackSnowDot 36 2 +westurner 34 5 +Tower-KevinLi 33 1 +billybobza 32 3 +cato- 32 1 +codertradergambler 32 1 +ivy-root 32 1 +robocdnjs 32 1 +Chourouk-Zioud 31 1 +Warrenty 31 2 +brianmaierjr 31 1 +mhutchinson-witness 31 2 +andrew-nault 30 1 +mouyong 30 2 +megantmcginley 29 1 + diff --git a/regression-test/data/variant_github_events_p0/countingStar1.out b/regression-test/data/variant_github_events_p0/countingStar1.out new file mode 100644 index 0000000000..633dfab5e1 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/countingStar1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar1 -- +4021 + diff --git a/regression-test/data/variant_github_events_p0/countingStar2.out b/regression-test/data/variant_github_events_p0/countingStar2.out new file mode 100644 index 0000000000..7658cd6fab --- /dev/null +++ b/regression-test/data/variant_github_events_p0/countingStar2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar2 -- +started 4021 + diff --git a/regression-test/data/variant_github_events_p0/countingStar3.out b/regression-test/data/variant_github_events_p0/countingStar3.out new file mode 100644 index 0000000000..516eb9b734 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/countingStar3.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar3 -- +42 + diff --git a/regression-test/data/variant_github_events_p0/distributionOfRepositoriesByStarCount.out b/regression-test/data/variant_github_events_p0/distributionOfRepositoriesByStarCount.out new file mode 100644 index 0000000000..ee8916c3a3 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/distributionOfRepositoriesByStarCount.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !distributionOfRepositoriesByStarCount -- +1.0 3375 +10.0 6 + diff --git a/regression-test/data/variant_github_events_p0/githubRoulette.out b/regression-test/data/variant_github_events_p0/githubRoulette.out new file mode 100644 index 0000000000..6db7338370 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/githubRoulette.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !githubRoulette -- +LinuxStandardBase/lsb +inf0rmer/blanket +parrt/cs652 +Pathgather/please-wait +jtsternberg/Shortcode_Button +EnterpriseQualityCoding/FizzBuzzEnterpriseEdition +VictiniX888/Illegal-Mod-Sites +robfig/cron +brendangregg/FlameGraph +h5bp/Front-end-Developer-Interview-Questions +pebble/pebblejs +EU-OSHA/osha-website +danielstjules/Stringy +scottjehl/picturefill +letsencrypt/heroku-acme +gaspaio/gearmanui +servo/servo +thlorenz/proxyquire +xpac27/LittleWorld +jpf/the-gannet +drrb/java-rust-example +jcodec/jcodec +mbadolato/iTerm2-Color-Schemes +X1011/git-directory-deploy +bestwnh/IGLDropDownMenu +alvarotrigo/fullPage.js +memsql/memsql-loader +martijnwalraven/meteor-ios +Microsoft/dotnet +cowboy/dotfiles +larrycai/pirate +greatfire/wiki +greatfire/wiki +rapid7/metasploit-framework +tobegit3hub/pirate +evido/wotreplay-parser +Phrogz/SLAXML +beautify-web/js-beautify +django/django +juhl/physicsRus +zingchart/ZingChart-jQuery +tomchristie/django-rest-framework +thephpleague/oauth2-server +cloudflare/golibs +iojs/io.js +golang/go +cppformat/cppformat +google/end-to-end +progrium/duplex +bwlewis/rthreejs + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.out new file mode 100644 index 0000000000..a75f6cd87d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears1 -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 31 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +h5bp/Front-end-Developer-Interview-Questions 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +rails/rails-perftest 6 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 +iojs/io.js 4 +josh/cafe-js 4 +leanote/leanote 4 +neilj/Squire 4 +orangeduck/libCello 4 +spf13/hugo 4 +square/PonyDebugger 4 +stackia/SteamFriendsManager 4 +staltz/cycle 4 +tobiasahlin/SpinKit 4 +twbs/bootstrap 4 +zingchart/ZingChart-jQuery 4 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.out new file mode 100644 index 0000000000..a510ce81ba --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears2 -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 31 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +h5bp/Front-end-Developer-Interview-Questions 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +rails/rails-perftest 6 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 +iojs/io.js 4 +josh/cafe-js 4 +leanote/leanote 4 +neilj/Squire 4 +orangeduck/libCello 4 +spf13/hugo 4 +square/PonyDebugger 4 +stackia/SteamFriendsManager 4 +staltz/cycle 4 +tobiasahlin/SpinKit 4 +twbs/bootstrap 4 +zingchart/ZingChart-jQuery 4 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.out new file mode 100644 index 0000000000..8d2c912415 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears3 -- +zzarcon/focusable 1 +zwaldowski/BlocksKit 1 +zurb/foundation 1 +zulfajuniadi/riverlevel 1 +zuha/Zuha 1 +zuffdaddy/dynalamp 1 +zorro3/ConfigArgParse 1 +zoomhub/zoomhub 1 +zokis/Python--Faster-Way 1 +ziyasal/node-procexss 1 +zhenchen/Experiment-for-secompax 1 +zeromq/jeromq 1 +zendframework/ZendSkeletonApplication 1 +zenazn/goji 1 +zedapp/zed 1 +zdavatz/spreadsheet 1 +zcweng/ToggleButton 1 +zackkitzmiller/gofish 1 +zachwill/flask-engine 1 +z411/trackma 1 +yyuu/pyenv-installer 1 +yxsicd/yxsimg 1 +ywdarklord/Go-Example 1 +yorikvanhavre/FreeCAD-library 1 +yogiben/meteor-bootstrap 1 +yob/pdf-reader 1 +ymx/RefExplorer 1 +yeoman/generator-angular 1 +yangqi/Htmldom 1 +yahoo/android-range-seek-bar 1 +y-ken/fluent-logger-mruby 1 +xxv/android-lifecycle 1 +xvoland/Extract 1 +xueruini/thuthesis 1 +xsacha/Sachesi 1 +xpac27/LittleWorld 1 +xobs/fernly 1 +xjzhou/500lines 1 +xicilion/fibjs 1 +xiaobozi/youku-lixian 1 +xiaoai/android-waterfall-demo 1 +xfeng/MultiHttpRequest 1 +xero-gateway/xero_gateway 1 +xenith-studios/ataxia 1 +xebia/jackson-lombok 1 +xcltapestry/XCL-Charts 1 +xat/chromecast-player 1 +wymsee/cordova-imagePicker 1 +wycc/arduino-wukong 1 +wycats/handlebars.js 1 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.out new file mode 100644 index 0000000000..c23f409ba5 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears4 -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 31 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +h5bp/Front-end-Developer-Interview-Questions 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +rails/rails-perftest 6 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 +iojs/io.js 4 +josh/cafe-js 4 +leanote/leanote 4 +neilj/Squire 4 +orangeduck/libCello 4 +spf13/hugo 4 +square/PonyDebugger 4 +stackia/SteamFriendsManager 4 +staltz/cycle 4 +tobiasahlin/SpinKit 4 +twbs/bootstrap 4 +zingchart/ZingChart-jQuery 4 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.out new file mode 100644 index 0000000000..523224a060 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears5 -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 31 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +h5bp/Front-end-Developer-Interview-Questions 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +rails/rails-perftest 6 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 +iojs/io.js 4 +josh/cafe-js 4 +leanote/leanote 4 +neilj/Squire 4 +orangeduck/libCello 4 +spf13/hugo 4 +square/PonyDebugger 4 +stackia/SteamFriendsManager 4 +staltz/cycle 4 +tobiasahlin/SpinKit 4 +twbs/bootstrap 4 +zingchart/ZingChart-jQuery 4 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.out b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.out new file mode 100644 index 0000000000..3c6e5bbe07 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears6 -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 31 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +h5bp/Front-end-Developer-Interview-Questions 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +rails/rails-perftest 6 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 +iojs/io.js 4 +josh/cafe-js 4 +leanote/leanote 4 +neilj/Squire 4 +orangeduck/libCello 4 +spf13/hugo 4 +square/PonyDebugger 4 +stackia/SteamFriendsManager 4 +staltz/cycle 4 +tobiasahlin/SpinKit 4 +twbs/bootstrap 4 +zingchart/ZingChart-jQuery 4 + diff --git a/regression-test/data/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.out b/regression-test/data/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.out new file mode 100644 index 0000000000..3e777719e3 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheTotalNumberOfStarsChangedOverTime -- +2015 2725 +2022 1296 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments1.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments1.out new file mode 100644 index 0000000000..b1db3f8a65 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments1 -- +5314 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments2.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments2.out new file mode 100644 index 0000000000..c50a162b2d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments2 -- +GovAlta/ui-components 69 +TTMTT/iCL0udin 34 +apache/spark 31 +CleverRaven/Cataclysm-DDA 29 +cachethq/Cachet 28 +frogsbreath-games/eucre 25 +HabitRPG/habitrpg 23 +tgstation/-tg-station 22 +rust-lang/rust 21 +Youssef1313/samples 20 +backdrop/backdrop-issues 18 +gratipay/inside.gratipay.com 18 +saltstack/salt 17 +lumien231/Custom-Main-Menu 16 +MetaMask/eth-phishing-detect 15 +numenta/nupic.core 15 +Homebrew/homebrew 13 +badrsony/icloudin-support- 13 +Expensify/App 12 +Sanne/testcontainers-java 11 +atom-community/autocomplete-plus 11 +diydrones/ardupilot 11 +kguil/Marvin-Roadmap 11 +kyma-project/kyma 11 +libantioch/antioch 11 +Baystation12/Baystation12 10 +hrydgard/ppsspp 10 +rQAQr/rss 10 +sikozheng/rshb 10 +tipfortip/issues 10 +Mindwerks/wildmidi 9 +NeuroVault/NeuroVault 9 +THE-ESCAPIST/RSSHub 9 +WhisperSystems/TextSecure 9 +XLabs/Xamarin-Forms-Labs 9 +aws/eks-distro 9 +disco-trooper/weather-app 9 +docker-library/docs 9 +expressjs/compression 9 +flutter/flutter 9 +isaacg1/pyth 9 +jscs-dev/node-jscs 9 +orkestral/venom 9 +udondan/jsii 9 +4Nanai/Abot 8 +Blockchain-Dev-Web/hardhat-erc721 8 +DoYana/myrss 8 +Joomla-Bible-Study/Joomla-Bible-Study 8 +JuliaLang/julia 8 +P3TERX/RSSHub 8 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments3.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments3.out new file mode 100644 index 0000000000..9cb3bda773 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments3 -- +GovAlta/ui-components 69 69 1.0 +TTMTT/iCL0udin 34 1 34.0 +apache/spark 31 12 2.58 +CleverRaven/Cataclysm-DDA 29 13 2.23 +cachethq/Cachet 28 11 2.55 +frogsbreath-games/eucre 25 25 1.0 +HabitRPG/habitrpg 23 10 2.3 +tgstation/-tg-station 22 10 2.2 +rust-lang/rust 21 13 1.62 +Youssef1313/samples 20 20 1.0 +backdrop/backdrop-issues 18 7 2.57 +gratipay/inside.gratipay.com 18 4 4.5 +saltstack/salt 17 7 2.43 +lumien231/Custom-Main-Menu 16 1 16.0 +MetaMask/eth-phishing-detect 15 15 1.0 +numenta/nupic.core 15 6 2.5 +Homebrew/homebrew 13 6 2.17 +badrsony/icloudin-support- 13 2 6.5 +Expensify/App 12 9 1.33 +Sanne/testcontainers-java 11 11 1.0 +atom-community/autocomplete-plus 11 1 11.0 +diydrones/ardupilot 11 8 1.38 +kguil/Marvin-Roadmap 11 2 5.5 +kyma-project/kyma 11 10 1.1 +libantioch/antioch 11 2 5.5 +Baystation12/Baystation12 10 5 2.0 +hrydgard/ppsspp 10 5 2.0 +rQAQr/rss 10 8 1.25 +sikozheng/rshb 10 6 1.67 +tipfortip/issues 10 1 10.0 +Mindwerks/wildmidi 9 9 1.0 +NeuroVault/NeuroVault 9 1 9.0 +THE-ESCAPIST/RSSHub 9 7 1.29 +WhisperSystems/TextSecure 9 8 1.12 +XLabs/Xamarin-Forms-Labs 9 6 1.5 +aws/eks-distro 9 1 9.0 +disco-trooper/weather-app 9 9 1.0 +docker-library/docs 9 4 2.25 +expressjs/compression 9 1 9.0 +flutter/flutter 9 9 1.0 +isaacg1/pyth 9 3 3.0 +jscs-dev/node-jscs 9 6 1.5 +orkestral/venom 9 9 1.0 +udondan/jsii 9 9 1.0 +4Nanai/Abot 8 5 1.6 +Blockchain-Dev-Web/hardhat-erc721 8 1 8.0 +DoYana/myrss 8 6 1.33 +Joomla-Bible-Study/Joomla-Bible-Study 8 1 8.0 +JuliaLang/julia 8 7 1.14 +P3TERX/RSSHub 8 6 1.33 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments4.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments4.out new file mode 100644 index 0000000000..737f2e33a4 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments4 -- +TTMTT/iCL0udin 1 34 +lumien231/Custom-Main-Menu 7 16 +cachethq/Cachet 173 11 +atom-community/autocomplete-plus 185 11 +saltstack/salt 19253 11 +tipfortip/issues 133 10 +badrsony/icloudin-support- 8 9 +expressjs/compression 25 9 +NeuroVault/NeuroVault 90 9 +gratipay/inside.gratipay.com 93 9 +aws/eks-distro 1398 9 +CleverRaven/Cataclysm-DDA 10643 9 +honeinc/emit-bindings 7 8 +libantioch/antioch 72 8 +alexgrist/ServerGuard 118 8 +Blockchain-Dev-Web/hardhat-erc721 279 8 +kguil/Marvin-Roadmap 362 8 +Joomla-Bible-Study/Joomla-Bible-Study 403 8 +isaacg1/pyth 10 7 +SpongePowered/SpongeAPI 272 7 +backdrop/backdrop-issues 521 7 +apache/spark 3864 7 +apache/spark 3865 7 +YungSang/boot2docker-vagrant-box 18 6 +iChun/Tabula 23 6 +ev3dev/ev3dev.github.io 37 6 +gratipay/inside.gratipay.com 86 6 +scientist-softserv/adventist-dl 117 6 +openfl/lime 301 6 +MinecraftForge/FML 581 6 +stedolan/jq 659 6 +carltonwhitehead/coner 2 5 +box/leche 5 5 +aatxe/irc 11 5 +torch/trepl 12 5 +sfu-natlang/lensingwikipedia 127 5 +numenta/nupic.core 295 5 +AtomLinter/Linter 316 5 +rwaldron/johnny-five 524 5 +blueboxgroup/ursula 670 5 +ankidroid/Anki-Android 680 5 +notsecure/uTox 718 5 +sass/libsass 790 5 +facebook/react 953 5 +nylira/prism-break 1212 5 +numenta/nupic 1694 5 +theCrag/website 1764 5 +HabitRPG/habitrpg 4458 5 +tgstation/-tg-station 6718 5 +hrydgard/ppsspp 7245 5 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments5.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments5.out new file mode 100644 index 0000000000..a4a6da943a --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments5.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments5 -- +atom-community/autocomplete-plus 185 11 +cachethq/Cachet 173 11 +saltstack/salt 19253 11 +tipfortip/issues 133 10 +CleverRaven/Cataclysm-DDA 10643 9 +NeuroVault/NeuroVault 90 9 +aws/eks-distro 1398 9 +expressjs/compression 25 9 +gratipay/inside.gratipay.com 93 9 +Blockchain-Dev-Web/hardhat-erc721 279 8 +Joomla-Bible-Study/Joomla-Bible-Study 403 8 +alexgrist/ServerGuard 118 8 +kguil/Marvin-Roadmap 362 8 +libantioch/antioch 72 8 +SpongePowered/SpongeAPI 272 7 +apache/spark 3864 7 +apache/spark 3865 7 +backdrop/backdrop-issues 521 7 +MinecraftForge/FML 581 6 +YungSang/boot2docker-vagrant-box 18 6 +ev3dev/ev3dev.github.io 37 6 +gratipay/inside.gratipay.com 86 6 +iChun/Tabula 23 6 +openfl/lime 301 6 +scientist-softserv/adventist-dl 117 6 +stedolan/jq 659 6 +AtomLinter/Linter 316 5 +HabitRPG/habitrpg 4458 5 +Homebrew/homebrew 35404 5 +aatxe/irc 11 5 +ankidroid/Anki-Android 680 5 +blueboxgroup/ursula 670 5 +facebook/react 953 5 +hrydgard/ppsspp 7245 5 +notsecure/uTox 718 5 +numenta/nupic 1694 5 +numenta/nupic.core 295 5 +nylira/prism-break 1212 5 +rwaldron/johnny-five 524 5 +sass/libsass 790 5 +sfu-natlang/lensingwikipedia 127 5 +tgstation/-tg-station 6718 5 +theCrag/website 1764 5 +torch/trepl 12 5 +Expensify/App 12400 4 +Homebrew/homebrew 35194 4 +TelescopeJS/Telescope 666 4 +Unitech/PM2 913 4 +apache/spark 3237 4 +apache/spark 3855 4 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments6.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments6.out new file mode 100644 index 0000000000..57af352119 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments6.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments6 -- +cachethq/Cachet 173 11 4 +SpongePowered/SpongeAPI 272 7 4 +MinecraftForge/FML 581 6 4 +HabitRPG/habitrpg 4458 5 4 +Homebrew/homebrew 35404 5 4 +ankidroid/Anki-Android 680 5 4 +tgstation/-tg-station 6718 5 4 +Unitech/PM2 913 4 4 +dolphin-emu/dolphin 1798 4 4 +nezhivar/nezhOS 190 4 4 +rust-lang/rust 20364 4 4 +tgstation/-tg-station 6689 4 4 + diff --git a/regression-test/data/variant_github_events_p0/issuesWithTheMostComments7.out b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments7.out new file mode 100644 index 0000000000..1a3d9f653d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/issuesWithTheMostComments7.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments7 -- +mheon/checksims 28 2 +rust-lang/rust 18 1 +reubenhwk/dvd_menu_animator 9 2 +eswordthecat/vgstation13 8 3 +Homebrew/homebrew-science 7 2 +JoeForks/Cachet 7 3 +rene-scheepers/case-life 7 1 +DeEvo/chamilo-lms-utp 6 1 +christophercochran/Genesis-Visual-Hook-Guide 5 1 +ProjectCollaboTeam/Collab 4 2 +Thunderbots-5604/2014-Code 4 1 +botandrose/calagator 4 2 +jdavis/twofactorauth 4 1 +jupitersh/jupitersh.github.io 4 1 +skillrecordings/products 4 1 +vangdfang/libcutter 4 1 +NREL/OpenStudio 3 1 +archaeopterasa/synx 3 2 +att/rcloud 3 2 +hashicorp/boundary-ui 3 1 +honestbleeps/Reddit-Enhancement-Suite 3 1 +japaric/rust 3 1 +AKST/jsgen 2 1 +AnyelinaMarte/saeamt-docente 2 1 +AyaNour333/Dashboard 2 1 +Beiyongcangku/things 2 1 +Brento27/Job-applications 2 1 +ErikZalm/Marlin 2 2 +GijsTimmers/kotnetcli 2 1 +Jamesking56/Cachet 2 1 +Nbodypp/HOW_final 2 2 +PCSX2/pcsx2 2 2 +Tsavsar/CapacitorApp 2 1 +WasabiFan/ev3dev.github.io 2 1 +Whatevering/news-homepage-fm 2 1 +b0wdyy/book-reads 2 1 +batyshkaLenin/alexandr-sidorenko.me 2 1 +cdandrango3/facturas 2 1 +composer/composer 2 1 +dengxqi/jsbbs 2 1 +elikemscott/Assessment 2 1 +george1410/daily-mix-saver 2 1 +giorgiaBertacchini/MLOps-kedro-auto 2 1 +k2wlxda/kernel-msm 2 1 +kmiecik013/test2 2 1 +koajs/koa 2 2 +kripken/emscripten-fastcomp-clang 2 1 +kyrias/pkgbuilds 2 2 +livepeer/livepeer.js 2 1 +lucasjolibois54/futureworld 2 1 + diff --git a/regression-test/data/variant_github_events_p0/load.out b/regression-test/data/variant_github_events_p0/load.out new file mode 100644 index 0000000000..13ce3dfca0 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/load.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +\N +\N +\N +4748 + diff --git a/regression-test/data/variant_github_events_p0/mostForkedRepositories.out b/regression-test/data/variant_github_events_p0/mostForkedRepositories.out new file mode 100644 index 0000000000..f516dae125 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/mostForkedRepositories.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mostForkedRepositories -- +cssdream/cssgrace 5 +labenuexercicios/objetos-template 5 +lambda81/lambda-addons 5 +octocat/Spoon-Knife 5 +prakhar1989/awesome-courses 5 +udacity/frontend-nanodegree-resume 5 +TheOdinProject/css-exercises 3 +ahmadpanah/Music-instrument 3 +bitcoin/bitcoin 3 +detuks/LeagueSharp 3 +neutron-org/testnets 3 +ranerlich7/library_django 3 +twbs/bootstrap 3 +Araq/Nim 2 +BradLarson/GPUImage 2 +ColeTownsend/Balzac-for-Jekyll 2 +EvgeniiMal/HTML-builder 2 +GAWMiners/paycoin 2 +InjectionDev/LeagueSharp 2 +LarryMad/recipes 2 +Qihoo360/phptrace 2 +SublimeHaskell/SublimeHaskell 2 +TORC2137/2137-2014-roboRIO-Labview 2 +Vanna007/Free-RDP 2 +WhisperSystems/TextSecure 2 +Wynncraft/Issues 2 +adobe/brackets 2 +alexvollmer/daemon-spawn 2 +apache/spark 2 +appacademy/active_record_lite 2 +bundler/bundler 2 +clowwindy/ChinaDNS 2 +cocos2d/cocos2d-x 2 +constjs/SmartThings-Devices 2 +deadlyvipers/dojo_rules 2 +discourse/discourse 2 +django-nonrel/django 2 +django/django 2 +firstcontributions/first-contributions 2 +flarum/core 2 +informatika-19/latihan-backend-19421040-bagasdwijayanto 2 +jculvey/roboto 2 +jlord/patchwork 2 +joinpursuit/8-0-react-hooks-lab 2 +julycoding/The-Art-Of-Programming-By-July 2 +learn-co-curriculum/phase-1-practice-toy-tale 2 +lexrus/VPNOn 2 +mmistakes/so-simple-theme 2 +mrkipling/maraschino 2 +nightscout/cgm-remote-monitor 2 + diff --git a/regression-test/data/variant_github_events_p0/mostPopularCommentsOnGithub.out b/regression-test/data/variant_github_events_p0/mostPopularCommentsOnGithub.out new file mode 100644 index 0000000000..a1a7da60a8 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/mostPopularCommentsOnGithub.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mostPopularCommentsOnGithub -- +🧹 1 +😑 1 +👍 1 +???? 1 +확인 1 +코드가 변경되서 찾지 못하겠어요 1 +정확한 부분의 SQL Language 입니다. 1 +에러 유발한 값들을 같이 찍어주면 더 좋을 것 같네요. 1 +어떻게 issue owner 할당 하나요? 이 이슈를 저한테 할당하려고 합니다. 1 +생성자를 통해 chatButton등을 넘겨받는 것이 좋을 것 같습니다. 1 +매직넘버를 루프로 바꾸는 과정에서 실수로 안바꿔준 부분 같습니다 1 +넵 막 Collaborators로 등록했습니다. 이제 스스로 할당 하실 수 있습니다 :) 1 +권한 적용 완료 1 +非常感谢! 1 +選択肢がそれしかないのを強調するため 1 +跪求大侠们把这bug认领了吧。\r\n我就可以直接开始干SRS3.0了。 1 +越发觉得把按钮从顶栏抽出来非常合理\r\n不过还是算了,我也不会搞(ㅍ_ㅍ) 1 +谢谢您! 1 +请使用stable的版本。 1 +请不要使用中文路径 1 +说解决方案 1 +表示在win也存在这问题,ctrl+c不能复制调试文字。\r\nfixed 就好 1 +有具体的IP列表吗?fakeip没有自动更新的机制 1 +是 han.js 里测试特性的代码中的。 1 +改名 1 +我这边魔改了很多东西,就是同步完,清理仓库 1 +我也遇到这个问题,目前没找到原因。等发现解决了方法再告诉你啊。 1 +想重新焊接一个,我看8822cs支持不好,有没有其他可以支持的模块 1 +微信这么一变,挺伤的。还不知什么时候会正式放出接口。 1 +已经提交了一个 Pull Requests 1 +已经不再用seajs了,browserify的实时打包或许是个正确的选择。恍惚++ 1 +参考資料ありがとうございます。\n 1 +你说的难道不是过滤规则的功能吗? 1 +今年がもっと素敵な一年になりますように。 1 +乃\r\n 1 +メニューへ移動して嬉しそうな機能は全て移し終わりました。\r\n 1 +すごい量!\r\n 1 +これは、(多分)私の仕事ですね。 1 +おめでとうございます! :tada: 1 +مبروك أخي مجد 1 +إنشاء الله أخي مجد 1 +а линтер на что, это же ридми?\r\nЗаменил ссылки 1 +Спасибо. Удалил 1 +Сам нашёл, сам пофиксил - всё правильно. 1 +Готово 1 +you can see them in archery tweaks. this is something I added and committed separately. 1 +you can increase the timeout value to allow for the scripts to run `host.idleTimeOut = 10000` 1 +yey 1 +yes, that's definitely on purpose 1 +yes, look at this: https://github.com/niXman/mingw-builds/pull/628 1 + diff --git a/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfRepositories.out b/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfRepositories.out new file mode 100644 index 0000000000..661a376e03 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfRepositories.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !organizationsByTheNumberOfRepositories -- +begriffs 1 +cachethq 1 +cssdream 1 +pathgather 1 +prakhar1989 1 +wasabeef 1 + diff --git a/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfStars.out b/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfStars.out new file mode 100644 index 0000000000..1d8c3833bf --- /dev/null +++ b/regression-test/data/variant_github_events_p0/organizationsByTheNumberOfStars.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !organizationsByTheNumberOfStars -- +wasabeef 44 +prakhar1989 32 +google 27 +h0x0er 24 +cachethq 17 +aplus-framework 16 +begriffs 13 +pathgather 13 +cssdream 12 +github 12 +microsoft 12 +netflix 12 +ymatuhin 11 +facebook 10 +gorhill 9 +h5bp 9 +kragniz 9 +qihoo360 9 +sindresorhus 9 +auchenberg 8 +cwrichardkim 8 +docker 8 +lexrus 8 +orangeduck 8 +papers-we-love 8 +projectdiscovery 8 +vinta 8 +atom 7 +dockerboard 7 +goagent 7 +kbandla 7 +nevin31 7 +ossu 7 +square 7 +substack 7 +zhihu 7 +alvarotrigo 6 +apache 6 +billlanyon 6 +inf0rmer 6 +isohuntto 6 +livid 6 +martinothamar 6 +public-apis 6 +rails 6 +spf13 6 +angular 5 +batteringram-dev 5 +cloudflare 5 +d235j 5 + diff --git a/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks1.out b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks1.out new file mode 100644 index 0000000000..a188bf2ba9 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks1 -- +cssdream/cssgrace 5 12 2.4 +labenuexercicios/objetos-template 5 0 0.0 +lambda81/lambda-addons 5 0 0.0 +octocat/Spoon-Knife 5 0 0.0 +prakhar1989/awesome-courses 5 32 6.4 +udacity/frontend-nanodegree-resume 5 0 0.0 +TheOdinProject/css-exercises 3 1 0.333 +ahmadpanah/Music-instrument 3 1 0.333 +bitcoin/bitcoin 3 0 0.0 +detuks/LeagueSharp 3 0 0.0 +neutron-org/testnets 3 0 0.0 +ranerlich7/library_django 3 0 0.0 +twbs/bootstrap 3 4 1.333 +Araq/Nim 2 0 0.0 +BradLarson/GPUImage 2 2 1.0 +ColeTownsend/Balzac-for-Jekyll 2 0 0.0 +EvgeniiMal/HTML-builder 2 0 0.0 +GAWMiners/paycoin 2 2 1.0 +InjectionDev/LeagueSharp 2 0 0.0 +LarryMad/recipes 2 0 0.0 +Qihoo360/phptrace 2 8 4.0 +SublimeHaskell/SublimeHaskell 2 0 0.0 +TORC2137/2137-2014-roboRIO-Labview 2 0 0.0 +Vanna007/Free-RDP 2 0 0.0 +WhisperSystems/TextSecure 2 0 0.0 +Wynncraft/Issues 2 0 0.0 +adobe/brackets 2 0 0.0 +alexvollmer/daemon-spawn 2 0 0.0 +apache/spark 2 1 0.5 +appacademy/active_record_lite 2 0 0.0 +bundler/bundler 2 0 0.0 +clowwindy/ChinaDNS 2 0 0.0 +cocos2d/cocos2d-x 2 1 0.5 +constjs/SmartThings-Devices 2 0 0.0 +deadlyvipers/dojo_rules 2 0 0.0 +discourse/discourse 2 1 0.5 +django-nonrel/django 2 0 0.0 +django/django 2 1 0.5 +firstcontributions/first-contributions 2 0 0.0 +flarum/core 2 4 2.0 +informatika-19/latihan-backend-19421040-bagasdwijayanto 2 0 0.0 +jculvey/roboto 2 0 0.0 +jlord/patchwork 2 0 0.0 +joinpursuit/8-0-react-hooks-lab 2 0 0.0 +julycoding/The-Art-Of-Programming-By-July 2 1 0.5 +learn-co-curriculum/phase-1-practice-toy-tale 2 0 0.0 +lexrus/VPNOn 2 7 3.5 +mmistakes/so-simple-theme 2 0 0.0 +mrkipling/maraschino 2 0 0.0 +nightscout/cgm-remote-monitor 2 1 0.5 + diff --git a/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks2.out b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks2.out new file mode 100644 index 0000000000..75a95a6165 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks2.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks2 -- + diff --git a/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks3.out b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks3.out new file mode 100644 index 0000000000..6861250a12 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks3.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks3 -- +cssdream/cssgrace 5 12 0.42 +prakhar1989/awesome-courses 5 32 0.16 + diff --git a/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks4.out b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks4.out new file mode 100644 index 0000000000..1d45fd3b9d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks4.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks4 -- +1289 4021 3.12 + diff --git a/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks5.out b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks5.out new file mode 100644 index 0000000000..a0d029cf34 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/proportionsBetweenStarsAndForks5.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks5 -- +12 121 10.08 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesByAmountOfModifiedCode.out b/regression-test/data/variant_github_events_p0/repositoriesByAmountOfModifiedCode.out new file mode 100644 index 0000000000..7f81112ac7 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesByAmountOfModifiedCode.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesByAmountOfModifiedCode -- +liraymond04/SocialFi 2 1 14022 8395 +OutOfSyncStudios/memory-cache 1 1 7258 7248 +ToniAnton22/Recipe-App 1 1 4434 9650 +Automattic/newspack-popups 1 1 8364 4205 +webmd-health-services/BuildMasterAutomation 1 1 6968 4646 +volcain-io/exercism.io 1 1 5797 5192 +Acidburn0zzz/foundation.mozilla.org 3 1 8671 1994 +lawhump/lawhump.github.io 1 1 1371 8813 +Dannnno/Quasar 1 1 8770 1255 +Macaulay2/M2 1 1 6311 3485 +skk-dev/ddskk 1 1 4751 4740 +CoderAllan/vscode-dgmlviewer 1 1 4727 4727 +cypress-io/cypress-documentation 1 1 4824 4584 +moneytree-doug/mt-d3 1 1 1510 7898 +SnowSE/project_aspen 1 1 6686 2494 +kubegems/kubegems 1 1 5365 3775 +novus/nvd3 1 1 985 7898 +T145/cubebots 1 1 2420 6340 +UmamiDAO/metrics-api 1 1 3853 3417 +kyungphill/practice_vue 1 1 4606 2663 +jinqshen/greatEffort 1 1 4433 2767 +lyft/clutch 1 1 3100 4031 +esparzou/site_tiledesign 1 1 4466 2663 +LeagueSharp/LeagueSharpCommon 1 1 3 7094 +metasoccer/TokenBondingCurve 1 1 6070 1017 +Mu-L/PaddleHub 1 1 2519 4554 +andrejv/wxmaxima 1 1 3541 3529 +ionos-cloud/module-ansible 1 1 4089 2962 +fishulla/Torque3D 2 1 3539 3409 +mikeyhodl/kubernetes 1 1 3893 2415 +moqimoqidea/Github-Ranking 1 1 4117 1956 +harrisonho99/react-native-windows-samples 2 1 4352 1564 +Shiker2032/chick-chirick 1 1 667 5157 +hto-projects/be-sound 1 1 3904 1598 +nf-core/tools 1 1 4189 1100 +Melon-Tropics/javascript-action 2 1 1764 3371 +lloydtao/readme-guestbook 2 1 1762 3354 +jdhanotaGU/CRAPS-Game 1 1 2244 2837 +dotiful/netlify-express-api 2 1 2087 2680 +templates-starter/OrchardCore 3 1 2942 1681 +mheap/action-router 1 1 1073 3193 +Torndeco/extdb 1 1 3481 697 +Trestolt/roll20-character-sheets 1 1 2838 1084 +coolsnowwolf/packages 1 1 2077 1836 +Toe-Tet/dmm-cdd 1 1 2582 1265 +maidsafe/MaidSafe-Drive 1 1 1540 2239 +ndelvalle/v-blur 2 1 2330 1146 +mhlabs/cfn-diagram 2 1 463 2894 +Creativice-Oy/graph-sentry 1 1 2584 747 +piwik/piwik 1 1 1979 1339 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesByTheNumberOfPushes.out b/regression-test/data/variant_github_events_p0/repositoriesByTheNumberOfPushes.out new file mode 100644 index 0000000000..a574b61a3d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesByTheNumberOfPushes.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesByTheNumberOfPushes -- +greatfire/wiki 48 1 +westurner/sphinxcontrib-srclinks 16 1 +josh/cafe-js 12 1 +miningforpotatoes/miningforpotatoes.github.io 9 1 +ogupte/trope 9 1 +VictiniX888/Illegal-Mod-Sites 8 1 +enfuse/pixled 7 1 +apache/spark 6 1 +cellier/cellier.github.io 6 1 +LemonPi/sal 5 1 +cleesmith/escalate 5 1 +odoo/odoo 5 1 +twbs/bootstrap 5 2 +bluejamesbond/TextJustify-Android 4 1 +chromium/chromium 4 1 +delner/pink_panther 4 1 +kragniz/json-sempai 4 1 +BackWoodsMod/BackWoods-Dev-Website 3 1 +IBM/pwa-lit-template 3 1 +ShouthostLLC/stripe-d 3 1 +TimmyO18/timmyobiso 3 1 +altipla-consulting/i18n-messages 3 1 +ariddell/lda 3 1 +ben-manes/caffeine 3 1 +bohoomil/fontconfig-ultimate 3 1 +cachethq/Cachet 3 2 +cubiclesoft/barebones-cms-shortcode-bb_syntaxhighlight 3 1 +joelpurra/jqnpm 3 1 +jonsterling/hs-abt 3 1 +lhorie/mithril.js 3 1 +nguyenhongson1902/lunar-lander-solver 3 1 +shurcooL/webgl 3 1 +HazyResearch/deepdive 2 1 +Homebrew/homebrew 2 2 +JetBrains/intellij-community 2 1 +Khan/perseus 2 1 +MatthewLymer/ScriptMigrations 2 1 +NodeBB/NodeBB 2 1 +Qmunity/QmunityLib 2 1 +StevenXL/learntoprogram 2 1 +ValcambiSuisseNFT/verify-info 2 1 +getlantern/lantern 2 1 +golang/go 2 1 +laurent22/joplin 2 1 +lhorie/mithril 2 1 +literallysame/Festivus-Mode 2 1 +mantarayforensics/mantaray 2 1 +rapid7/metasploit-framework 2 2 +ropensci/webservices 2 1 +rossdylan/eris 2 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.out b/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.out new file mode 100644 index 0000000000..46e1f359ae --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithClickhouse_related_comments1 -- +apache/spark 14 +TTMTT/iCL0udin 3 +blueboxgroup/ursula 3 +apache/arrow 1 +apache/cassandra-dtest 1 +apache/dolphinscheduler 1 +expressjs/compression 1 +fluo-io/fluo-dev 1 +icret/EasyImages2.0 1 +indiebox/ubos-admin 1 +kanaka/websockify 1 +rstudio/pins-r 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.out b/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.out new file mode 100644 index 0000000000..66e1d07e44 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithClickhouse_related_comments2 -- +apache/spark 1 14 +TTMTT/iCL0udin 0 3 +blueboxgroup/ursula 0 3 +apache/arrow 0 1 +apache/cassandra-dtest 0 1 +apache/dolphinscheduler 0 1 +expressjs/compression 0 1 +fluo-io/fluo-dev 0 1 +icret/EasyImages2.0 0 1 +indiebox/ubos-admin 0 1 +kanaka/websockify 0 1 +rstudio/pins-r 0 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments1.out b/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments1.out new file mode 100644 index 0000000000..7f4dbb909d --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments1.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithDoris_related_comments1 -- +apache/spark 22 +HabitRPG/habitrpg 1 +python/mypy 1 +rstudio/pins-r 1 +selfhub/selfhub 1 +tesseradata/docs-datadr 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments2.out b/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments2.out new file mode 100644 index 0000000000..c248e312e3 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithDoris_related_comments2.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithDoris_related_comments2 -- +apache/spark 1 22 +HabitRPG/habitrpg 0 1 +python/mypy 0 1 +rstudio/pins-r 0 1 +selfhub/selfhub 0 1 +tesseradata/docs-datadr 0 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.out new file mode 100644 index 0000000000..d436dae02b --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.out @@ -0,0 +1,14 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheHighestGrowthYoY -- +avelino/awesome-go 3 1 3.0 2015-01-01T10:01:25 +torvalds/linux 2 1 2.0 2015-01-01T10:13:15 +esl/MongooseIM 1 1 1.0 2015-01-01T10:37:13 +gitlabhq/gitlabhq 1 1 1.0 2015-01-01T10:37:54 +haoel/leetcode 1 1 1.0 2015-01-01T10:34:53 +sindresorhus/awesome 1 2 0.5 2015-01-01T10:33:57 +github/gitignore 1 3 0.333 2015-01-01T10:25:26 +golang/go 1 3 0.333 2015-01-01T08:05:52 +h5bp/Front-end-Developer-Interview-Questions 2 6 0.333 2015-01-01T08:01:30 +leanote/leanote 1 4 0.25 2015-01-01T09:11:54 +prakhar1989/awesome-courses 1 31 0.032 2015-01-01T08:07 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.out new file mode 100644 index 0000000000..9d395cfb85 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues1 -- +No-CQRT/GooGuns 44 1 +YGGDRASIL-STUDIO/Discouraged-Workers 16 1 +kcaa/kcaa 13 1 +pddemo/demo 11 1 +GHConnIT/system-test-repo-1667812266 10 1 +LeiDeMing/reading 9 1 +backdrop/backdrop-issues 7 3 +DsooRadha/CDMX013-md-links 6 1 +GunZi200/Memory-Colour 6 1 +antonioortegajr/beerfind.me 6 1 +christophercochran/Genesis-Visual-Hook-Guide 6 1 +starakaj/rnecklace 6 1 +GMOD/Apollo 5 1 +g19-mr/azh 5 1 +getparsec/getparsec 5 1 +huntermcmillian/huntermcmillian 5 1 +leo424y/heysiri.ml 5 1 +waltervr/mejengol 5 1 +ApexKO/issue-tracking 4 1 +Baystation12/Baystation12 4 1 +CTC-CompTech/delivery 4 2 +CrafterKina/ExperiencePower 4 1 +RPMTW/RPMTW-Platform-Mod 4 1 +Shyenaia/prework-study-guide 4 1 +cachethq/Cachet 4 3 +djbouche/glowing-bear 4 1 +hzinner/lab-agile-planning 4 1 +ligershark/side-waffle 4 1 +pvandervelde/cloud-jenkins 4 1 +seadog007/EasyCal 4 1 +BryanDeJesus/CSC-251-GroupProject 3 1 +EKGAPI/webAppEKGAPI 3 1 +GiuseppeFilingeri/upgraded-symmetrical-waddle 3 1 +Kindnesscove/kindnesscove 3 1 +MiYa-Solutions/sbcx 3 2 +NamNguyen911/first_app 3 1 +RahalYesser/Training-Management 3 1 +TechCavern/WaveTact 3 1 +TechnicPack/TechnicLauncher 3 3 +Wel-Alves/lab-agile-planning 3 1 +ac-engine/amusement-creators-engine 3 1 +asiekierka/MagnumOpus 3 1 +bikeindex/bike_index 3 1 +campaul/ph.sh 3 2 +captainkirkby/Gears 3 1 +darinmorrison/type-nats.rs 3 1 +endercrest/ColorCube 3 1 +glasklart/hd 3 1 +ivolunteerph/ivolunteerph 3 1 +kalamuna/kalastatic 3 2 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.out new file mode 100644 index 0000000000..9bdf653671 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues2 -- +No-CQRT/GooGuns 44 1 0 +YGGDRASIL-STUDIO/Discouraged-Workers 16 1 0 +kcaa/kcaa 13 1 0 +pddemo/demo 11 1 0 +GHConnIT/system-test-repo-1667812266 10 1 0 +LeiDeMing/reading 9 1 0 +backdrop/backdrop-issues 7 3 0 +DsooRadha/CDMX013-md-links 6 1 0 +GunZi200/Memory-Colour 6 1 0 +antonioortegajr/beerfind.me 6 1 0 +christophercochran/Genesis-Visual-Hook-Guide 6 1 0 +starakaj/rnecklace 6 1 0 +GMOD/Apollo 5 1 0 +g19-mr/azh 5 1 0 +getparsec/getparsec 5 1 0 +huntermcmillian/huntermcmillian 5 1 0 +leo424y/heysiri.ml 5 1 0 +waltervr/mejengol 5 1 0 +ApexKO/issue-tracking 4 1 0 +Baystation12/Baystation12 4 1 0 +CTC-CompTech/delivery 4 2 0 +CrafterKina/ExperiencePower 4 1 0 +RPMTW/RPMTW-Platform-Mod 4 1 0 +Shyenaia/prework-study-guide 4 1 0 +cachethq/Cachet 4 3 17 +djbouche/glowing-bear 4 1 0 +hzinner/lab-agile-planning 4 1 0 +ligershark/side-waffle 4 1 1 +pvandervelde/cloud-jenkins 4 1 0 +seadog007/EasyCal 4 1 0 +BryanDeJesus/CSC-251-GroupProject 3 1 0 +EKGAPI/webAppEKGAPI 3 1 0 +GiuseppeFilingeri/upgraded-symmetrical-waddle 3 1 0 +Kindnesscove/kindnesscove 3 1 0 +MiYa-Solutions/sbcx 3 2 0 +NamNguyen911/first_app 3 1 0 +RahalYesser/Training-Management 3 1 0 +TechCavern/WaveTact 3 1 0 +TechnicPack/TechnicLauncher 3 3 0 +Wel-Alves/lab-agile-planning 3 1 0 +ac-engine/amusement-creators-engine 3 1 0 +asiekierka/MagnumOpus 3 1 0 +bikeindex/bike_index 3 1 0 +campaul/ph.sh 3 2 0 +captainkirkby/Gears 3 1 0 +darinmorrison/type-nats.rs 3 1 0 +endercrest/ColorCube 3 1 0 +glasklart/hd 3 1 0 +ivolunteerph/ivolunteerph 3 1 0 +kalamuna/kalastatic 3 2 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.out new file mode 100644 index 0000000000..88c4f29596 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues3 -- +wasabeef/awesome-android-ui 0 0 35 +prakhar1989/awesome-courses 0 0 32 +Pathgather/please-wait 0 0 13 +cssdream/cssgrace 0 0 12 +begriffs/postgrest 1 1 12 +cachethq/Cachet 4 3 17 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.out new file mode 100644 index 0000000000..805d935c55 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues4 -- +wasabeef/awesome-android-ui 0 0 35 +prakhar1989/awesome-courses 0 0 32 +Pathgather/please-wait 0 0 13 +cssdream/cssgrace 0 0 12 +Netflix/ice 0 0 9 +gorhill/uBlock 0 0 9 +kragniz/json-sempai 0 0 9 +wasabeef/awesome-android-libraries 0 0 9 +Qihoo360/phptrace 0 0 8 +auchenberg/chrome-devtools-app 0 0 8 +h5bp/Front-end-Developer-Interview-Questions 0 0 8 +papers-we-love/papers-we-love 0 0 8 +vinta/awesome-python 0 0 8 +goagent/goagent 0 0 7 +kbandla/APTnotes 0 0 7 +lexrus/VPNOn 0 0 7 +projectdiscovery/katana 0 0 7 +zhihu/kids 0 0 7 +alvarotrigo/fullPage.js 0 0 6 +dockerboard/dockerboard 0 0 6 +inf0rmer/blanket 0 0 6 +isohuntto/openbay 0 0 6 +livid/v2ex 0 0 6 +martinothamar/Mediator 0 0 6 +ossu/computer-science 0 0 6 +public-apis/public-apis 0 0 6 +rails/rails-perftest 0 0 6 +DovAmir/awesome-design-patterns 0 0 5 +Reactive-Extensions/RxJS 0 0 5 +d235j/360Controller 0 0 5 +fcambus/nginx-resources 0 0 5 +leanote/leanote 0 0 5 +lensterxyz/lenster 0 0 5 +nemoTyrant/manong 0 0 5 +Anchor89/GithubHub 0 0 4 +Byron/gitoxide 0 0 4 +FelisCatus/SwitchyOmega 0 0 4 +avelino/awesome-go 0 0 4 +flarum/core 0 0 4 +github/gitignore 0 0 4 +golang/go 0 0 4 +google/end-to-end 0 0 4 +greatfire/wiki 0 0 4 +imgix/imgix-emacs 0 0 4 +iojs/io.js 0 0 4 +josh/cafe-js 0 0 4 +neilj/Squire 0 0 4 +orangeduck/libCello 0 0 4 +spf13/hugo 0 0 4 +square/PonyDebugger 0 0 4 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.out new file mode 100644 index 0000000000..a3eec38c16 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfPullRequests1 -- +0xMoJo7/Snapchain 1 1 +102/eslint-plugin-comment-annotations 1 1 +10sr/junks 1 1 +1367944498/rsshub 1 1 +18F/analytics-proxy-nodejs 1 1 +18F/openFEC 1 1 +23technologies/23ke-charts 1 1 +2yd/rsshub 1 1 +3m4q3m4q/repo1 1 1 +4GeeksAcademy/build-your-full-stack-developer-resume 1 1 +4i5/hello-world 1 1 +6shell/windows_exporter 1 1 +77-A/.net3 1 1 +7ackkkkk/rsshub 1 1 +7comp/android_frameworks_opt_telephony 1 1 +ABf1ag/learne 1 1 +ADCP1/airbnb-backend 1 1 +AKJAW/ApolloDataBuilders 1 1 +ATGardner/OSMExport 1 1 +AY2223S1-CS2103T-F12-2/tp 1 1 +AY2223S1-CS2103T-T11-2/tp 1 1 +AY2223S1-CS2103T-T12-1/tp 1 1 +AY2223S1-CS2103T-T17-1/tp 1 1 +AY2223S1-CS2103T-W15-4/tp 1 1 +AZhur771/TLP 1 1 +AbiaEssienRepos/auto-price-estimation-project 1 1 +AcalaNetwork/safe-config-service 1 1 +AdWerx/pronto-ruby 1 1 +AdamHidvegi/CurrencyC 1 1 +AdoptOpenJDK/openjdk-website 1 1 +Aerendir/bundle-aws-ses-monitor 1 1 +AhmadTanvir/vue_lara 1 1 +Airnow-test/aspnetcore 1 1 +Alan-love/language-server-protocol 1 1 +AlanYe-Dev/rsshub-vercel 1 1 +AlessioPrete/packagetest 1 1 +AletheiaOrg/Aletheia 1 1 +AlexLazareva/sarafan 1 1 +Alez05/tesla-interface-react-emotion-poc 1 1 +AlipayDocs/open-docs 1 1 +Alisa1106/vividus-starter-project 1 1 +Aliyamuskan/FirstRepo 1 1 +Alttaab/19.3-flask-survey-exercise 1 1 +Amiiiiiiiin/Escaping-Hell 1 1 +AndroidIDEOfficial/AndroidIDE 1 1 +AnhelinaZhurauleva/vividus-hometask2 1 1 +AnkitParte/astute-line-8992 1 1 +AnttiHal/express-harjoitus 1 1 +Apicurio/apicurio-registry 1 1 +Araq/Nim 1 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.out new file mode 100644 index 0000000000..2f9adf408a --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfPullRequests2 -- +scalr-automation/terraform-scalr-flat-mirror5 13 7 +woowacourse-precourse/java-baseball 7 7 +scalr-automation/terraform-scalr-flat-mirror4 5 4 +google/it-cert-automation-practice 4 4 +neutron-org/testnets 4 4 +selfhub/selfhub 4 4 +WolseyBankWitness/rediffusion 8 3 +mhutchinson/mhutchinson-distributor 8 3 +CleverRaven/Cataclysm-DDA 4 3 +apache/spark 3 3 +rust-lang/rust 3 3 +tgstation/-tg-station 3 3 +cachethq/Cachet 4 2 +eclipse/birt 4 2 +Homebrew/homebrew-cask 3 2 +octokit/octokit.net 3 2 +openshift/sippy 3 2 +Bhargavhs/GitHubWorkShop 2 2 +EKGAPI/KardiaApp 2 2 +JorgeX/dojo_rules 2 2 +JuliaLang/METADATA.jl 2 2 +KSP-CKAN/NetKAN 2 2 +Merchello/Merchello 2 2 +PCSX2/pcsx2 2 2 +Strukturavaltas-03-Frontend-2022/csapatmunka---angular-webshop-01-3-underground 2 2 +TeamGabriel/gabriel 2 2 +WhisperSystems/TextSecure 2 2 +app-sre/qontract-reconcile 2 2 +azelezovs/auto-bootcamp-setup 2 2 +bioconda/bioconda-recipes 2 2 +cnrancher/dashboard 2 2 +coocoo08/LookSt 2 2 +docker/docs 2 2 +googleapis/gapic-generator-ruby 2 2 +myMarketPg/project-mymarket 2 2 +php/php-src 2 2 +quarkusio/quarkus-github-bot 2 2 +rspec/rspec-core 2 2 +slothbear/dojo_rules 2 2 +twitter/scalding 2 2 +frogsbreath-games/eucre 28 1 +Youssef1313/samples 21 1 +DataDog/opentelemetry-collector-contrib 17 1 +golden-warning/giraffedraft-server 15 1 +objectiser/opentelemetry-collector-contrib 13 1 +Sanne/testcontainers-java 10 1 +PCMDI/pcmdi_metrics 9 1 +outofcoffee/testcontainers-java 9 1 +pcwiese/opentelemetry-collector-contrib 9 1 +rmfitzpatrick/opentelemetry-collector-contrib 8 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out new file mode 100644 index 0000000000..a2f35081cd --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumNumberOfAcceptedInvitations -- +loserskater/M8-GPE 1 2 +wasabeef/awesome-android-ui 0 35 +prakhar1989/awesome-courses 0 32 +cachethq/Cachet 0 17 +Pathgather/please-wait 0 13 +begriffs/postgrest 0 12 +cssdream/cssgrace 0 12 +Netflix/ice 0 9 +gorhill/uBlock 0 9 +kragniz/json-sempai 0 9 +wasabeef/awesome-android-libraries 0 9 +Qihoo360/phptrace 0 8 +auchenberg/chrome-devtools-app 0 8 +h5bp/Front-end-Developer-Interview-Questions 0 8 +papers-we-love/papers-we-love 0 8 +vinta/awesome-python 0 8 +goagent/goagent 0 7 +kbandla/APTnotes 0 7 +lexrus/VPNOn 0 7 +projectdiscovery/katana 0 7 +zhihu/kids 0 7 +alvarotrigo/fullPage.js 0 6 +dockerboard/dockerboard 0 6 +inf0rmer/blanket 0 6 +isohuntto/openbay 0 6 +livid/v2ex 0 6 +martinothamar/Mediator 0 6 +ossu/computer-science 0 6 +public-apis/public-apis 0 6 +rails/rails-perftest 0 6 +DovAmir/awesome-design-patterns 0 5 +Reactive-Extensions/RxJS 0 5 +d235j/360Controller 0 5 +fcambus/nginx-resources 0 5 +leanote/leanote 0 5 +lensterxyz/lenster 0 5 +mastodon/mastodon 0 5 +nemoTyrant/manong 0 5 +Anchor89/GithubHub 0 4 +Byron/gitoxide 0 4 +FelisCatus/SwitchyOmega 0 4 +atom/atom 0 4 +avelino/awesome-go 0 4 +docker/fig 0 4 +facebook/react 0 4 +flarum/core 0 4 +github/gitignore 0 4 +golang/go 0 4 +google/end-to-end 0 4 +greatfire/wiki 0 4 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.out new file mode 100644 index 0000000000..cd6c3f13fd --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostPeopleWhoHavePushAccess1 -- +zzzgydi/clash-verge 0 1 +zzarcon/focusable 0 1 +zylmua/xiaomi 0 1 +zwaldowski/blockskit 0 1 +zurb/foundation 0 1 +zulfajuniadi/riverlevel 0 1 +zuiidea/antd-admin 0 1 +zuha/zuha 0 1 +zuffdaddy/dynalamp 0 1 +zorzalerrante/tsundoku 0 1 +zorro3/configargparse 0 1 +zoomhub/zoomhub 0 1 +zombodb/postgres-parser 0 1 +zokis/python--faster-way 0 1 +zlywilk/klipper4a 0 1 +zjunlp/openue 0 1 +ziyasal/node-procexss 0 1 +ziparchive/ziparchive 0 1 +zigeng/c_p 0 1 +zhoudaxiaa/vpn- 0 1 +zhongyang219/trafficmonitor 0 1 +zhenchen/experiment-for-secompax 0 1 +zhanymkanov/fastapi-best-practices 0 1 +zeromq/jeromq 0 1 +zentyal/zentyal 0 1 +zentific/vmidbg 0 1 +zendframework/zendskeletonapplication 0 1 +zenazn/goji 0 1 +zedapp/zed 0 1 +zdavatz/spreadsheet 0 1 +zcweng/togglebutton 0 1 +zasder3/train-clip 0 1 +zarel/pokemon-showdown-client 0 1 +zackkitzmiller/gofish 0 1 +zachwill/flask-engine 0 1 +zacharypatten/dotnet-console-games 0 1 +zacharydubois/s3-sync 0 1 +zacharydubois/ip-updater 0 1 +z411/trackma 0 1 +z-huang/innertune 0 1 +yyuu/pyenv-installer 0 1 +yxymit/dbx1000 0 1 +ywdarklord/go-example 0 1 +yushen0118/garment_generation 0 1 +yupenghe/methylpy 0 1 +yun-liu/rcf-pytorch 0 1 +yulife/wanderlust-reloaded 0 1 +yorikvanhavre/freecad-library 0 1 +yogiben/meteor-bootstrap 0 1 +yob/pdf-reader 0 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.out new file mode 100644 index 0000000000..3f2bdf749f --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostStarsOverOneDay1 -- +wasabeef/awesome-android-ui 2015-01-01 35 +prakhar1989/awesome-courses 2015-01-01 31 +cachethq/Cachet 2015-01-01 17 +Pathgather/please-wait 2015-01-01 13 +begriffs/postgrest 2015-01-01 12 +cssdream/cssgrace 2015-01-01 12 +Netflix/ice 2015-01-01 9 +gorhill/uBlock 2015-01-01 9 +kragniz/json-sempai 2015-01-01 9 +wasabeef/awesome-android-libraries 2015-01-01 9 +Qihoo360/phptrace 2015-01-01 8 +auchenberg/chrome-devtools-app 2015-01-01 8 +papers-we-love/papers-we-love 2015-01-01 8 +vinta/awesome-python 2015-01-01 8 +goagent/goagent 2015-01-01 7 +kbandla/APTnotes 2015-01-01 7 +lexrus/VPNOn 2015-01-01 7 +projectdiscovery/katana 2022-11-08 7 +zhihu/kids 2015-01-01 7 +alvarotrigo/fullPage.js 2015-01-01 6 +dockerboard/dockerboard 2015-01-01 6 +h5bp/Front-end-Developer-Interview-Questions 2015-01-01 6 +inf0rmer/blanket 2015-01-01 6 +isohuntto/openbay 2015-01-01 6 +livid/v2ex 2015-01-01 6 +martinothamar/Mediator 2022-11-08 6 +ossu/computer-science 2022-11-08 6 +public-apis/public-apis 2022-11-08 6 +rails/rails-perftest 2015-01-01 6 +DovAmir/awesome-design-patterns 2022-11-08 5 +Reactive-Extensions/RxJS 2015-01-01 5 +d235j/360Controller 2015-01-01 5 +fcambus/nginx-resources 2015-01-01 5 +nemoTyrant/manong 2015-01-01 5 +Anchor89/GithubHub 2015-01-01 4 +FelisCatus/SwitchyOmega 2015-01-01 4 +atom/atom 2015-01-01 4 +docker/fig 2015-01-01 4 +facebook/react 2015-01-01 4 +flarum/core 2015-01-01 4 +google/end-to-end 2015-01-01 4 +greatfire/wiki 2015-01-01 4 +imgix/imgix-emacs 2015-01-01 4 +iojs/io.js 2015-01-01 4 +josh/cafe-js 2015-01-01 4 +leanote/leanote 2015-01-01 4 +neilj/Squire 2015-01-01 4 +orangeduck/libCello 2015-01-01 4 +spf13/hugo 2015-01-01 4 +square/PonyDebugger 2015-01-01 4 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.out new file mode 100644 index 0000000000..4afc1de3f9 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostSteadyGrowthOverTime -- +Pierian-Data/Complete-Python-3-Bootcamp 1 2 2.0 +avelino/awesome-go 2 4 2.0 +charmbracelet/vhs 1 2 2.0 +esl/MongooseIM 1 2 2.0 +gitlabhq/gitlabhq 1 2 2.0 +haoel/leetcode 1 2 2.0 +httpie/httpie 1 2 2.0 +laurent22/joplin 1 2 2.0 +starkscan/starkscan-verifier 1 2 2.0 +stashapp/stash 1 2 2.0 +lensterxyz/lenster 3 5 1.6666666666666667 +mastodon/mastodon 3 5 1.6666666666666667 +sindresorhus/awesome 2 3 1.5 +torvalds/linux 2 3 1.5 +trinib/Linux-Bash-Commands 2 3 1.5 +Byron/gitoxide 3 4 1.3333333333333333 +github/gitignore 3 4 1.3333333333333333 +golang/go 3 4 1.3333333333333333 +h5bp/Front-end-Developer-Interview-Questions 6 8 1.3333333333333333 +leanote/leanote 4 5 1.25 +prakhar1989/awesome-courses 31 32 1.032258064516129 +0fflinexd/Calculator 1 1 1.0 +0x192/universal-android-debloater 1 1 1.0 +0x4a6965/VitamioDemo 1 1 1.0 +0xabad1dea/Christmas-Card-2014 1 1 1.0 +18F/midas 1 1 1.0 +19128785540/rxrw-daily_morning 1 1 1.0 +1uphealth/fhir-react 1 1 1.0 +22century/bot-project 1 1 1.0 +2captcha/2captcha-go 1 1 1.0 +3dd13/sample-nw 1 1 1.0 +3m1o/nginx-rtmp-monitoring 1 1 1.0 +42wim/matterbridge 1 1 1.0 +47deg/labs-scala-play-mongo 1 1 1.0 +4lessandrodev/finance-project-ddd 1 1 1.0 +4u4v/ThinkPHP_Backend_System 1 1 1.0 +52inc/learn-ios 1 1 1.0 +6si/shipwright 1 1 1.0 +71104/lambda 1 1 1.0 +9inevolt/betterdgg 1 1 1.0 +AI-Guru/music-generation-research 1 1 1.0 +AUTOMATIC1111/stable-diffusion-webui 1 1 1.0 +AVGP/terminal.js 1 1 1.0 +AbdelrhmanHamouda/locust-k8s-operator 1 1 1.0 +Abecarne/Epitech 1 1 1.0 +Ableton/LinkKit 1 1 1.0 +Activiti/Activiti 1 1 1.0 +AdamNowotny/BuildReactor 1 1 1.0 +AdguardTeam/AdGuardHome 1 1 1.0 +AgentMaker/Paddle-CLIP 1 1 1.0 + diff --git a/regression-test/data/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.out b/regression-test/data/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.out new file mode 100644 index 0000000000..eaa089dd02 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheWorstStagnation_order -- +github/gitignore 1 3 0.333 2015-01-01T10:25:26 +golang/go 1 3 0.333 2015-01-01T08:05:52 +h5bp/Front-end-Developer-Interview-Questions 2 6 0.333 2015-01-01T08:01:30 +leanote/leanote 1 4 0.25 2015-01-01T09:11:54 +prakhar1989/awesome-courses 1 31 0.032 2015-01-01T08:07 +sindresorhus/awesome 1 2 0.5 2015-01-01T10:33:57 + diff --git a/regression-test/data/variant_github_events_p0/repositoryAffinityList1.out b/regression-test/data/variant_github_events_p0/repositoryAffinityList1.out new file mode 100644 index 0000000000..bd05ae6ab5 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoryAffinityList1.out @@ -0,0 +1,52 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoryAffinityList1 -- +prakhar1989/awesome-courses 32 +wasabeef/awesome-android-ui 8 +vinta/awesome-python 3 +wasabeef/awesome-android-libraries 3 +fcambus/nginx-resources 2 +papers-we-love/papers-we-love 2 +Activiti/Activiti 1 +Cydrobolt/polr 1 +Developer-Y/cs-video-courses 1 +FortAwesome/Font-Awesome 1 +Netflix/ice 1 +PHPMailer/PHPMailer 1 +Semantic-Org/Semantic-UI 1 +StevenSLXie/Tutorials-for-Web-Developers 1 +alphagov/government-service-design-manual 1 +alvarotrigo/fullPage.js 1 +angular/angular.js 1 +apache/spark 1 +ben-manes/caffeine 1 +benplummer/calendarful 1 +cachethq/Cachet 1 +deshack/pure-less 1 +digitalnature/php-highlight 1 +digitalnature/php-ref 1 +dingo/api 1 +dkhamsing/ios-asset-names 1 +drrb/java-rust-example 1 +flarum/core 1 +foreverjs/forever 1 +gdi2290/angular-websocket 1 +github/gitignore 1 +google/google-api-php-client 1 +gorhill/uBlock 1 +gulpjs/gulp 1 +guzzle/guzzle 1 +iojs/io.js 1 +isohuntto/openbay 1 +iverberk/larasearch 1 +jenssegers/laravel-agent 1 +jenssegers/laravel-mongodb 1 +jsvd/cv 1 +pgmodeler/pgmodeler 1 +serbanghita/Mobile-Detect 1 +thephpleague/csv 1 +thephpleague/flysystem 1 +torvalds/linux 1 +twbs/bootstrap 1 +vhf/free-programming-books 1 +zurb/foundation 1 + diff --git a/regression-test/data/variant_github_events_p0/repositoryAffinityList2.out b/regression-test/data/variant_github_events_p0/repositoryAffinityList2.out new file mode 100644 index 0000000000..67482925cf --- /dev/null +++ b/regression-test/data/variant_github_events_p0/repositoryAffinityList2.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoryAffinityList2 -- +Pathgather/please-wait 13 0.08 +begriffs/postgrest 12 0.08 +cssdream/cssgrace 12 0.08 +cachethq/Cachet 16 0.06 +prakhar1989/awesome-courses 32 0.03 +wasabeef/awesome-android-ui 35 0.03 + diff --git a/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers1.out b/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers1.out new file mode 100644 index 0000000000..bbe5da89e0 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers1.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !starsFromHeavyGithubUsers1 -- +Automattic/pocket-casts-android 1 +BYVoid/Batsh 1 +Crzyrndm/FilterExtension 1 +JeffreyWay/Laravel-Model-Validation 1 +MegaBits/SIOSocket 1 +Microsoft/dotnet 1 +Qihoo360/phptrace 1 +SFTtech/openage 1 +arturadib/shelljs 1 +cakephp/cakepackages 1 +cakephp/cakephp-codesniffer 1 +cakephp/csfnavbar 1 +chef-workflow/chef-workflow-example 1 +d235j/360Controller 1 +enaqx/awesome-react 1 +hamstergene/pathmatch 1 +jackc/pgx 1 +jesyspa/book 1 +jonsterling/intersection-types-primer 1 +josegonzalez/cakephp-datatable 1 +lorenzo/slugger 1 +msabramo/setuptools-markdown 1 +nvd3-community/nvd3 1 +opscode/chef 1 +rackt/react-router 1 +serialhex/nano-highlight 1 +sindresorhus/jshint-stylish 1 +xenith-studios/ataxia 1 + diff --git a/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers2.out b/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers2.out new file mode 100644 index 0000000000..ea9bfe0165 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/starsFromHeavyGithubUsers2.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !starsFromHeavyGithubUsers2 -- +MegaBits/SIOSocket 1 +Qihoo360/phptrace 1 +chef-workflow/chef-workflow-example 1 +jackc/pgx 1 +nvd3-community/nvd3 1 +opscode/chef 1 +xenith-studios/ataxia 1 + diff --git a/regression-test/data/variant_github_events_p0/theLongestRepositoryNames1.out b/regression-test/data/variant_github_events_p0/theLongestRepositoryNames1.out new file mode 100644 index 0000000000..026117bde5 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theLongestRepositoryNames1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theLongestRepositoryNames1 -- +1 the-aws-terraform-samples/terraform-route-53-application-recovery-controller-codepipeline-with-terraform +1 Ayusha-Bhola/-Kisaan-Sahayak-Intelligent-Farmers-e-Marketplace-with-prediction-of-crop-risk-factors. +1 danielPoloWork/EURIS-academy2022-meterial-solidPrinciplesAndDesignPatterns +1 ShadmanShariar/CRUD_Operation_On_Firebase_Database_Using_JavaScript +1 lakshay-arora/Densenet121-Image-Classification-Deployed-using-Flask +1 ArthurZC23/Machine-Learning-A-Probabilistic-Perspective-Solutions +1 deepaktiwari88/HR-Management-and-Geo-Attendance-System-Admin-App +1 HariharanGopinath/Generate-Music-using-a-LSTM-Neural-Network +1 jpsarda/Pixel-based-destructible-ground-with-Cocos2d-iPhone +1 sudharsan13296/Hands-On-Reinforcement-Learning-With-Python +1 SN-RECIT-formation-a-distance/moodle-local_recitdashboard +1 cubiclesoft/barebones-cms-shortcode-bb_syntaxhighlight +1 xilinxfairchild/FPGABasedHighPerformanceTargetChecking +1 NerijusBartosevicius/laravel-insert-update-delete-ids +1 electron-react-boilerplate/electron-react-boilerplate +2 AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +1 Lysergic-Acid/android_device_samsung_galaxys2-common +1 MicrosoftLearning/AZ-104-MicrosoftAzureAdministrator +1 mikeycal/the-video-editors-render-script-for-blender +1 Nevin31/Classification-of-Wisconson-Cancer-Dataset +1 centralnicgroup-opensource/rtldev-middleware-whmcs +1 AmdjedSanero/CodePostal-58-Wilaya-Algerie-With-JS +2 EnterpriseQualityCoding/FizzBuzzEnterpriseEdition +1 SamyPesse/How-to-Make-a-Computer-Operating-System +1 godot-extended-libraries/godot-antialiased-line2d +1 ArnaudBarre/vite-plugin-react-click-to-component +1 Siegener-Anime-und-Manga-Treff-SAMT/SAMT-Website +1 the-aws-terraform-samples/terraform-eks-jumphost +1 RajneeshSingh007/College-Management-Android-App +1 akoskm/vite-react-tailwindcss-browser-extension +1 batteringram-dev/Data-Structures-and-Algorithms +2 billlanyon/js-therapist-react-node-mongo-docker +1 google-github-actions/get-secretmanager-secrets +1 python-semantic-release/python-semantic-release +1 Juan-Carlos-Estevez-Vargas/Estevez-Corporation +1 Learn-Dev/Learn-Dev-Theme---Dashboard-partie-1 +1 rafaelsilverioit/twitter-django-rest-framework +1 webacademyufac/programacao-avancada-backend-t2 +1 BlueRaja/Weighted-Item-Randomizer-for-C-Sharp +1 GoogleCloudPlatform/compute-video-demo-puppet +1 conal/talk-2014-lambdajam-denotational-design +1 elasticsearch/elasticsearch-analysis-kuromoji +1 miningforpotatoes/miningforpotatoes.github.io +1 singwhatiwanna/PinnedHeaderExpandableListView +1 Ebazhanov/linkedin-skill-assessments-quizzes +1 PacktPublishing/ASP.NET-Core-5-for-Beginners +1 Schweinepriester/github-profile-achievements +1 abhisheknaiidu/awesome-github-profile-readme +1 adrianhajdin/project_modern_ui_ux_restaurant +1 billlanyon/js-reform-beauty-node-express-poc + diff --git a/regression-test/data/variant_github_events_p0/theLongestRepositoryNames2.out b/regression-test/data/variant_github_events_p0/theLongestRepositoryNames2.out new file mode 100644 index 0000000000..36e3e58aaf --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theLongestRepositoryNames2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theLongestRepositoryNames2 -- +ent/ent 1 +h2o/h2o 1 +jsvd/cv 1 +jsx/JSX 1 +lxc/lxc 1 +lxc/lxd 1 +antfu/ni 1 +gburd/pt 1 +gwoo/hap 1 +hmml/ev3 1 +pkg/sftp 1 +svg/svgo 2 +yui/yui3 2 +18F/midas 1 +Erol/yomu 1 +aasm/aasm 1 +akka/akka 2 +appc/spec 1 +atom/atom 4 +dingo/api 1 +dlwh/puck 1 +fuel/fuel 1 +gazay/gon 1 +glfw/glfw 1 +golang/go 4 +harelba/q 1 +iauns/cpm 1 +iron/iron 1 +ix/kyr.li 1 +jackc/pgx 1 +jlnr/gosu 1 +koajs/koa 1 +koush/ion 1 +kr/pretty 1 +lg/murder 1 +lvgl/lvgl 1 +norx/NORX 1 +odoo/odoo 1 +ossu/math 1 +peco/peco 1 +phan/phan 1 +prql/prql 1 +pyjs/pyjs 1 +rack/rack 2 +tux3/qTox 1 +unjs/ungh 1 +vuejs/vue 1 +zuha/Zuha 1 +BVLC/caffe 1 +DomKM/silk 1 + diff --git a/regression-test/data/variant_github_events_p0/theMostToughCodeReviews.out b/regression-test/data/variant_github_events_p0/theMostToughCodeReviews.out new file mode 100644 index 0000000000..6e6f3e7c13 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theMostToughCodeReviews.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theMostToughCodeReviews -- +https://github.com/d3athrow/vgstation13/pull/ 4 +https://github.com/CleverRaven/Cataclysm-DDA/pull/ 3 +https://github.com/apache/spark/pull/ 3 +https://github.com/dotnet/runtime/pull/ 3 +https://github.com/facebookincubator/velox/pull/ 3 +https://github.com/kubernetes/kubernetes/pull/ 3 +https://github.com/rust-lang/rfcs/pull/ 3 +https://github.com/sass/libsass/pull/ 3 +https://github.com/selfhub/selfhub/pull/ 3 +https://github.com/tgstation/-tg-station/pull/ 3 +https://github.com/AndreasMadsen/steer-screenshot/pull/ 2 +https://github.com/CienProject2014/OneLevelHero/pull/ 2 +https://github.com/JuliaLang/julia/pull/ 2 +https://github.com/OpenRA/OpenRA/pull/ 2 +https://github.com/RuddockHouse/RuddockWebsite/pull/ 2 +https://github.com/SirCmpwn/ChatSharp/pull/ 2 +https://github.com/YaleSTC/reservations/pull/ 2 +https://github.com/ankidroid/Anki-Android/pull/ 2 +https://github.com/apache/airflow/pull/ 2 +https://github.com/buildbot/buildbot-infra/pull/ 2 +https://github.com/cachethq/Cachet/pull/ 2 +https://github.com/carymrobbins/intellij-haskforce/pull/ 2 +https://github.com/coreycondardo/30-Day-Rule/pull/ 2 +https://github.com/docker-library/docs/pull/ 2 +https://github.com/elastic/kibana/pull/ 2 +https://github.com/grafana/grafana/pull/ 2 +https://github.com/hashintel/hash/pull/ 2 +https://github.com/home-assistant/core/pull/ 2 +https://github.com/mongodb-js/compass/pull/ 2 +https://github.com/mupen64plus/mupen64plus-video-glide64mk2/pull/ 2 +https://github.com/napari/napari/pull/ 2 +https://github.com/odoo/odoo/pull/ 2 +https://github.com/percona/pmm/pull/ 2 +https://github.com/risingwavelabs/risingwave/pull/ 2 +https://github.com/rspec/rspec-core/pull/ 2 +https://github.com/rspec/rspec-rails/pull/ 2 +https://github.com/sebastianbergmann/phpunit/pull/ 2 +https://github.com/sourcegraph/sourcegraph/pull/ 2 +https://github.com/sourcegraph/srclib/pull/ 2 +https://github.com/square/okhttp/pull/ 2 +https://github.com/substack/tape/pull/ 2 +https://github.com/tsuru/tsuru/pull/ 2 +https://github.com/venmo/synx/pull/ 2 +https://github.com/01-edu/public/pull/ 1 +https://github.com/42AGV/ft_transcendence/pull/ 1 +https://github.com/ADCP1/airbnb-backend/pull/ 1 +https://github.com/AMReX-Combustion/PelePhysics/pull/ 1 +https://github.com/AbsaOSS/spline/pull/ 1 +https://github.com/ActiveState/cli/pull/ 1 +https://github.com/Adyen/adyen-dotnet-api-library/pull/ 1 + diff --git a/regression-test/data/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.out b/regression-test/data/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.out new file mode 100644 index 0000000000..5b3c27542e --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfRepositoriesOnGithub -- +31481 + diff --git a/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.out b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.out new file mode 100644 index 0000000000..9fc9540502 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub1 -- +26724 + diff --git a/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.out b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.out new file mode 100644 index 0000000000..37b63bc00b --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub2 -- +2763 + diff --git a/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.out b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.out new file mode 100644 index 0000000000..f3b520f20b --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub3 -- +16510 + diff --git a/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.out b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.out new file mode 100644 index 0000000000..11f9d28e41 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub4 -- +1309 + diff --git a/regression-test/data/variant_github_events_p0/topRepositoriesByStars.out b/regression-test/data/variant_github_events_p0/topRepositoriesByStars.out new file mode 100644 index 0000000000..47450b19e5 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/topRepositoriesByStars.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !topRepositoriesByStars -- +wasabeef/awesome-android-ui 35 +prakhar1989/awesome-courses 32 +cachethq/Cachet 17 +Pathgather/please-wait 13 +begriffs/postgrest 12 +cssdream/cssgrace 12 +Netflix/ice 9 +gorhill/uBlock 9 +kragniz/json-sempai 9 +wasabeef/awesome-android-libraries 9 +Qihoo360/phptrace 8 +auchenberg/chrome-devtools-app 8 +h5bp/Front-end-Developer-Interview-Questions 8 +papers-we-love/papers-we-love 8 +vinta/awesome-python 8 +goagent/goagent 7 +kbandla/APTnotes 7 +lexrus/VPNOn 7 +projectdiscovery/katana 7 +zhihu/kids 7 +alvarotrigo/fullPage.js 6 +dockerboard/dockerboard 6 +inf0rmer/blanket 6 +isohuntto/openbay 6 +livid/v2ex 6 +martinothamar/Mediator 6 +ossu/computer-science 6 +public-apis/public-apis 6 +rails/rails-perftest 6 +DovAmir/awesome-design-patterns 5 +Reactive-Extensions/RxJS 5 +d235j/360Controller 5 +fcambus/nginx-resources 5 +leanote/leanote 5 +lensterxyz/lenster 5 +mastodon/mastodon 5 +nemoTyrant/manong 5 +Anchor89/GithubHub 4 +Byron/gitoxide 4 +FelisCatus/SwitchyOmega 4 +atom/atom 4 +avelino/awesome-go 4 +docker/fig 4 +facebook/react 4 +flarum/core 4 +github/gitignore 4 +golang/go 4 +google/end-to-end 4 +greatfire/wiki 4 +imgix/imgix-emacs 4 + diff --git a/regression-test/data/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.out b/regression-test/data/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.out new file mode 100644 index 0000000000..bc6d2c790e --- /dev/null +++ b/regression-test/data/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whatIsTheBestDayOfTheWeekToCatchAStar -- +2 366 +3 930 +5 2725 + diff --git a/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.out b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.out new file mode 100644 index 0000000000..ceb681760b --- /dev/null +++ b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whoAreAllThosePeopleGivingStars1 -- +daweedkob 94 +cliffordfajardo 39 +iloveyuedu 27 +GameCracker 24 +cdleon 24 +whatcool 24 +lootnath 21 +mmestrovic 21 +EyuCoder 19 +raygerrard 16 +miketahani 15 +45H 12 +kazaky 12 +Malerator 11 +zwm5000 10 +cceasy 9 +gotlium 9 +kevindhawkins 9 +Godoctors 8 +jacsonLee 8 +jerson 8 +Github5201314 7 +Nuvini 7 +abhijit1990 7 +bchoomnuan 7 +fengdou902 7 +jameswfoster 7 +lmumar 7 +takuan-osho 7 +zx48 7 +DanielRuf 6 +IssamElbaytam 6 +Jerzerak 6 +ShovelCode 6 +aculich 6 +billlanyon 6 +co-sh 6 +darkpixel 6 +ivan4th 6 +railsjedi 6 +stonelasley 6 +x140yu 6 +DavidAlphaFox 5 +IanLuo 5 +JosephCastro 5 +MedG1 5 +Mrkavindu 5 +Narno 5 +andtxr 5 +athosss23 5 + diff --git a/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.out b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.out new file mode 100644 index 0000000000..cf1765406f --- /dev/null +++ b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whoAreAllThosePeopleGivingStars2 -- +cliffordfajardo 39 + diff --git a/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.out b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.out new file mode 100644 index 0000000000..5c7968ad12 --- /dev/null +++ b/regression-test/data/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whoAreAllThosePeopleGivingStars3 -- +htmlpreview/htmlpreview.github.com 2 +KuiKui/Octosplit 1 +Nuclides/github-highlight-selected 1 +RReverser/github-editorconfig 1 +Yatser/prettypullrequests 1 +adamburmister/gitprint.com 1 +alexcpendleton/GithubForkConfirmation 1 +anasnakawa/chrome-github-avatars 1 +batmanimal/object-oriented-js 1 +benbernard/CommentTracker 1 +buunguyen/octotree 1 +buunguyen/topbar 1 +camsong/chrome-github-mate 1 +chancancode/blame_parent 1 +cisox/github-approve-deny 1 +dlo/github-issue-filter-chrome-extension 1 +evilbuck/pr-sanity 1 +jasonlong/isometric-contributions 1 +jcouyang/gira 1 +johan/github-improved 1 +lxe/require-navigator 1 +mebjas/github-report 1 +mebjas/movie-name-extractor 1 +mesuutt/github-annotator 1 +mikedougherty/chrome-commit-status 1 +msolomon/github-submodule-links 1 +petebacondarwin/github-pr-helper 1 +rudids/js_sequence_extension 1 +sindresorhus/github-hide-files 1 +sindresorhus/github-issues-all 1 +sindresorhus/github-tab-size 1 +sirkitree/github-issue-utils 1 +skidding/github-issue-template 1 +sqren/github-widescreen 1 +summerblue/github-toc 1 +thieman/github-selfies 1 +typpo/codenav 1 +vieux/github-lgtm 1 + diff --git a/regression-test/data/variant_p0/column_name.out b/regression-test/data/variant_p0/column_name.out new file mode 100644 index 0000000000..7d3794e453 --- /dev/null +++ b/regression-test/data/variant_p0/column_name.out @@ -0,0 +1,22 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +中文 \N + +-- !sql -- +"" + +-- !sql -- +\N +"11111" + +-- !sql -- +\N +\N +456 + +-- !sql -- +\N \N +\N \N +\N \N +UPPER CASE lower case + diff --git a/regression-test/data/variant_p0/complexjson.out b/regression-test/data/variant_p0/complexjson.out new file mode 100644 index 0000000000..d5a5154c67 --- /dev/null +++ b/regression-test/data/variant_p0/complexjson.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 {"key_0":[{"key_1":[{"key_3":[{"key_7":1025,"key_6":25.5,"key_4":1048576,"key_5":0.0001048576},{"key_7":2,"key_6":"","key_4":null}]}]},{"key_1":[{"key_3":[{"key_7":-922337203685477600.0,"key_6":"aqbjfiruu","key_5":-1},{"key_7":65537,"key_6":"","key_4":""}]},{"key_3":[{"key_7":21474836.48,"key_4":"ghdqyeiom","key_5":1048575}]}]}],"id":1} + +-- !sql -- +1 {"key_1":[{"key_2":[{"key_3":[{"key_8":65537},{"key_4":[{"key_5":-0.02},{"key_7":1023},{"key_7":1,"key_6":9223372036854775807}]},{"key_4":[{"key_7":65537,"key_6":null}]}]}]}],"id":1} + +-- !sql -- +1 {"key_0":{"key_4":1,"key_1":{"key_2":1025,"key_3":1},"key_5":256},"key_11":"anve","key_10":65536} +2 {"key_0":[{"key_12":"buwvq","key_11":2.55e-8}]} + +-- !sql -- +1 {"key_0":[{"key_1":{"key_2":[1,2,3],"key_8":"sffjx"},"key_10":65535,"key_0":-1},{"key_10":10.23,"key_0":922337203.685}],"id":1} + +-- !sql -- +1 {"key_0":[{"key_1":[{"key_2":{"key_3":[{"key_4":255},{"key_4":65535},{"key_7":255,"key_6":3}],"key_5":[{"key_7":"nnpqx","key_6":1},{"key_7":255,"key_6":3}]}}]}],"id":1} + diff --git a/regression-test/data/variant_p0/delete.json b/regression-test/data/variant_p0/delete.json new file mode 100644 index 0000000000..a40687b133 --- /dev/null +++ b/regression-test/data/variant_p0/delete.json @@ -0,0 +1 @@ +{"k" : 1, "__DORIS_DELETE_SIGN__": 1} diff --git a/regression-test/data/variant_p0/delete_update.out b/regression-test/data/variant_p0/delete_update.out new file mode 100644 index 0000000000..be66efb82e --- /dev/null +++ b/regression-test/data/variant_p0/delete_update.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2 {"updated_value":123} +3 {"c":3.0,"a":3,"b":[3]} +4 {"c":4.0,"a":4,"b":[4]} +5 {"c":5.0,"a":5,"b":[5]} + +-- !sql -- +2 {"updated_value":123} {"updated_value":123} + diff --git a/regression-test/data/variant_p0/insert_into_select.out b/regression-test/data/variant_p0/insert_into_select.out new file mode 100644 index 0000000000..ea1714d2ca --- /dev/null +++ b/regression-test/data/variant_p0/insert_into_select.out @@ -0,0 +1,30 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 [1] 1 +1 [1] 1 +1 [1] 1 +1 [1] 1 +2 [1] 2 +2 [1] 2 +2 [1] 2 +2 [1] 2 +3 [3] 3 +3 [3] 3 +3 [3] 3 +3 [3] 3 +4 [4] 4 +4 [4] 4 +4 [4] 4 +4 [4] 4 +5 [5] 5 +5 [5] 5 +5 [5] 5 +5 [5] 5 + +-- !sql -- +{"c":1.0,"a":1,"b":[1]} +{"c":2.0,"a":2,"b":[1]} +{"c":3.0,"a":3,"b":[3]} +{"c":4.0,"a":4,"b":[4]} +{"c":5.0,"a":5,"b":[5]} + diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out new file mode 100644 index 0000000000..3b9cdd658b --- /dev/null +++ b/regression-test/data/variant_p0/load.out @@ -0,0 +1,261 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 [1, 2, null] +1 [1] +1 [1] +1 [null] + +-- !sql -- +27 + +-- !sql -- + +-- !sql -- +16 + +-- !sql -- +{"c":"123"} +{"c":123} +{"cc":[123.0]} +{"cc":[123.1]} +{"ccc":123} +{"ccc":123321} +{"cccc":123.0} +{"cccc":123.11} +{"ccccc":[123]} +{"ccccc":[123456789]} +{"b":1111111111111111} +{"b":1.222222} +{"bb":1} +{"bb":214748364711} +{"A":1} + +-- !sql -- +15 + +-- !sql_2 -- +123456789101112 +1 +123 +123456 + +-- !sql_4 -- +123 \N {"A":123} +1 \N {"A":1} +123456 \N {"A":123456} +123456789101112 \N {"A":123456789101112} +\N [123456] {"AA":[123456]} +\N [123456789101112] {"AA":[123456789101112]} + +-- !sql_5 -- +123456 \N {"A":123456} {"A":123456} +123456789101112 \N {"A":123456789101112} {"A":123456789101112} + +-- !sql_6 -- +\N \N +\N \N +"123" \N +\N 1 +\N 123 +1.10111 1800 +1.1111 17211 +\N 123456 +123 191191 +\N 123456789101112 + +-- !sql_7 -- +2 1 +1 123 +20 1800 +22 17211 +4 123456 +16 191191 +8 123456789101112 + +-- !sql_8 -- +\N 123 +\N 1 +\N 123456 +\N 123456789101112 +\N \N +\N \N + +-- !sql_11 -- +123 +123456 +123456789101112 +191191 +1800 +17211 + +-- !sql_12 -- +123 {"A":123} +123456 {"A":123456} +123456789101112 {"A":123456789101112} +191191 {"c":123,"A":191191,"a":123.0} +1800 {"c":[12345],"A":1800,"a":1.10111} +17211 {"c":111111,"A":17211,"a":1.1111} + +-- !sql_13 -- +\N 123 +\N 1 +\N 123456 +\N 123456789101112 + +-- !sql_14 -- +\N 123456 {"A":123456} + +-- !sql_18 -- +\N 123 {"A":123} \N +\N 1 {"A":1} \N +\N 123456 {"A":123456} \N +\N 123456789101112 {"A":123456789101112} \N +\N \N {"AA":[123456]} \N +\N \N {"AA":[123456789101112]} \N +123 191191 {"c":123,"A":191191,"a":123.0} \N +123 \N {"c":123456,"a":"123"} \N +1.10111 1800 {"c":[12345],"A":1800,"a":1.10111} \N +1.1111 17211 {"c":111111,"A":17211,"a":1.1111} \N + +-- !sql_19 -- +\N \N {"oamama":1.1} 1.1 + +-- !sql_20 -- +123456 + +-- !sql_21_1 -- +12 {"yyy":456,"xxx":123} + +-- !sql_21_2 -- +[123456] +[123456789101112] + +-- !sql -- +3 + +-- !sql_22 -- +123 +\N +\N + +-- !sql_23 -- +\N +[123] +\N + +-- !sql_24 -- +\N +\N +[123] + +-- !sql_25 -- +50000 55000.00000000863 6150000 + +-- !sql_26 -- +5000 + +-- !sql_29_1 -- +1 {"kxxxx":123} {"xxxxyyyy":123} +1 {"kyyyy":"123"} {"kxkxkxkx":[123]} + +-- !sql -- +6 + +-- !sql -- +4 + +-- !sql_29 -- +["123",123,[123]] +123 +123456 +[123,"123",1.11111] +[123,1.11,"123"] +[123,{"xx":1}] +[123,{"a":1}] +[{"a":1},123] + +-- !sql_30 -- +7.111 [123,{"xx":1}] {"b":{"c":456,"e":7.111}} 456 + +-- !sql_30 -- +{"a":1123} +{"a":11245,"c":{"c":456,"e":7.111},"b":[123,{"xx":1}]} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} +{"xxxx":"kaana","a":1234} + +-- !sql_31 -- +{"a":1123,"c":{"c":456,"e":7.111},"oooo":{"xxxx":{"xxx":123}},"b":[123,{"xx":1}]} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} +{"ddd":{"aaa":123,"mxmxm":[456,"789"]},"xxxx":"kaana","a":1234} + +-- !sql_32 -- +"2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\\"discounts\\":[],\\"gift_card\\":false,\\"grams\\":0,\\"line_price\\":\\"28950000.00\\",\\"original_line_price\\":\\"28950000.00\\",\\"original_price\\":\\"28.95\\",\\"price\\":\\"28.95\\",\\"product_id\\":7706860617977,\\"sku\\":\\"56011440_Shirt(A)||Brown||M9\\",\\"taxable\\":false,\\"title\\":\\"Men's Hawaiian Short Sleeve Shirt - Shirt(A) \\\\/ Brown \\\\/ M\\",\\"total_discount\\":\\"0.00\\",\\"vendor\\":\\"XT\\",\\"discounted_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"original_line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"total_discount_set\\":{\\"shop_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"}}}],\\"note\\":null,\\"updated_at\\":\\"2023-06-21T08:35:56.674Z\\",\\"created_at\\":\\"2023-06-21T08:35:48.174Z\\"}}" + +-- !sql_32_1 -- +2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={"id":"3293880a36cd163754ea4f90270331f6","token":"3293880a36cd163754ea4f90270331f6","line_items":[{"id":43073082228985,"properties":{},"quantity":1000000,"variant_id":43073082228985,"key":"43073082228985:381f0b4b03d0c76493aa028c4ed006a9","discounted_price":"28.95","discounts":[],"gift_card":false,"grams":0,"line_price":"28950000.00","original_line_price":"28950000.00","original_price":"28.95","price":"28.95","product_id":7706860617977,"sku":"56011440_Shirt(A)||Brown||M9","taxable":false,"title":"Men's Hawaiian Short Sleeve Shirt - Shirt(A) \\/ Brown \\/ M","total_discount":"0.00","vendor":"XT","discounted_price_set":{"shop_money":{"amount":"28.95","currency_code":"USD"},"presentment_money":{"amount":"28.95","currency_code":"USD"}},"line_price_set":{"shop_money":{"amount":"28950000.0","currency_code":"USD"},"presentment_money":{"amount":"28950000.0","currency_code":"USD"}},"original_line_price_set":{"shop_money":{"amount":"28950000.0","currency_code":"USD"},"presentment_money":{"amount":"28950000.0","currency_code":"USD"}},"price_set":{"shop_money":{"amount":"28.95","currency_code":"USD"},"presentment_money":{"amount":"28.95","currency_code":"USD"}},"total_discount_set":{"shop_money":{"amount":"0.0","currency_code":"USD"},"presentment_money":{"amount":"0.0","currency_code":"USD"}}}],"note":null,"updated_at":"2023-06-21T08:35:56.674Z","created_at":"2023-06-21T08:35:48.174Z"}} + +-- !sql_33 -- +"2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\\"discounts\\":[],\\"gift_card\\":false,\\"grams\\":0,\\"line_price\\":\\"28950000.00\\",\\"original_line_price\\":\\"28950000.00\\",\\"original_price\\":\\"28.95\\",\\"price\\":\\"28.95\\",\\"product_id\\":7706860617977,\\"sku\\":\\"56011440_Shirt(A)||Brown||M9\\",\\"taxable\\":false,\\"title\\":\\"Men's Hawaiian Short Sleeve Shirt - Shirt(A) \\\\/ Brown \\\\/ M\\",\\"total_discount\\":\\"0.00\\",\\"vendor\\":\\"XT\\",\\"discounted_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"original_line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"total_discount_set\\":{\\"shop_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"}}}],\\"note\\":null,\\"updated_at\\":\\"2023-06-21T08:35:56.674Z\\",\\"created_at\\":\\"2023-06-21T08:35:48.174Z\\"}}" + +-- !sql_33_1 -- +"2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\\"discounts\\":[],\\"gift_card\\":false,\\"grams\\":0,\\"line_price\\":\\"28950000.00\\",\\"original_line_price\\":\\"28950000.00\\",\\"original_price\\":\\"28.95\\",\\"price\\":\\"28.95\\",\\"product_id\\":7706860617977,\\"sku\\":\\"56011440_Shirt(A)||Brown||M9\\",\\"taxable\\":false,\\"title\\":\\"Men's Hawaiian Short Sleeve Shirt - Shirt(A) \\\\/ Brown \\\\/ M\\",\\"total_discount\\":\\"0.00\\",\\"vendor\\":\\"XT\\",\\"discounted_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"original_line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"total_discount_set\\":{\\"shop_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"}}}],\\"note\\":null,\\"updated_at\\":\\"2023-06-21T08:35:56.674Z\\",\\"created_at\\":\\"2023-06-21T08:35:48.174Z\\"}}" + +-- !sql_34 -- +"2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\\"discounts\\":[],\\"gift_card\\":false,\\"grams\\":0,\\"line_price\\":\\"28950000.00\\",\\"original_line_price\\":\\"28950000.00\\",\\"original_price\\":\\"28.95\\",\\"price\\":\\"28.95\\",\\"product_id\\":7706860617977,\\"sku\\":\\"56011440_Shirt(A)||Brown||M9\\",\\"taxable\\":false,\\"title\\":\\"Men's Hawaiian Short Sleeve Shirt - Shirt(A) \\\\/ Brown \\\\/ M\\",\\"total_discount\\":\\"0.00\\",\\"vendor\\":\\"XT\\",\\"discounted_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"original_line_price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28950000.0\\",\\"currency_code\\":\\"USD\\"}},\\"price_set\\":{\\"shop_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"28.95\\",\\"currency_code\\":\\"USD\\"}},\\"total_discount_set\\":{\\"shop_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"},\\"presentment_money\\":{\\"amount\\":\\"0.0\\",\\"currency_code\\":\\"USD\\"}}}],\\"note\\":null,\\"updated_at\\":\\"2023-06-21T08:35:56.674Z\\",\\"created_at\\":\\"2023-06-21T08:35:48.174Z\\"}}" + +-- !sql_35 -- + +-- !sql_35_1 -- + +-- !sql_36_1 -- +1 \N \N +\N \N 1 +\N \N \N +\N \N \N +\N \N \N +\N \N \N +\N \N \N +\N \N \N +\N \N \N +\N \N \N + +-- !sql_36_2 -- +7702 {"payload":{"commits":[{"sha":"348743fdce27d3f3c97e366381b1b7b371fc4510","author":{"email":"9b7a0973fc99779f7e1822eb7336ff5d28bd2653@users.noreply.github.com","name":"Łukasz Magiera"},"message":"Create README.md","distinct":true,"url":"https://api.github.com/repos/magik6k/BitBuffer/commits/348743fdce27d3f3c97e366381b1b7b371fc4510"}],"before":"4e150694dacd35e7d5cda4e9f6a2aedb1d35db36","head":"348743fdce27d3f3c97e366381b1b7b371fc4510","size":1,"push_id":536752118,"ref":"refs/heads/master","distinct_size":1},"created_at":"2015-01-01T00:59:58Z","id":"2489395761","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/magik6k","id":3867941,"login":"magik6k","avatar_url":"https://avatars.githubusercontent.com/u/3867941?"},"repo":{"url":"https://api.github.com/repos/magik6k/BitBuffer","id":28677864,"name":"magik6k/BitBuffer"},"type":"PushEvent"} +7701 {"payload":{"pages":[{"page_name":"Android Development Basics","title":"Android Development Basics","summary":null,"action":"edited","sha":"e4e947a4f29b1a06f560ac1e62bd3bf183e434b6","html_url":"https://github.com/wllmtrng/wllmtrng.github.io/wiki/Android-Development-Basics"}]},"created_at":"2015-01-01T00:59:58Z","id":"2489395760","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/wllmtrng","id":1335855,"login":"wllmtrng","avatar_url":"https://avatars.githubusercontent.com/u/1335855?"},"repo":{"url":"https://api.github.com/repos/wllmtrng/wllmtrng.github.io","id":18089434,"name":"wllmtrng/wllmtrng.github.io"},"type":"GollumEvent"} +7700 {"payload":{"forkee":{"svn_url":"https://github.com/WangXYZ/TBC","pushed_at":"2014-12-24T18:26:11Z","issues_url":"https://api.github.com/repos/WangXYZ/TBC/issues{/number}","events_url":"https://api.github.com/repos/WangXYZ/TBC/events","labels_url":"https://api.github.com/repos/WangXYZ/TBC/labels{/name}","releases_url":"https://api.github.com/repos/WangXYZ/TBC/releases{/id}","keys_url":"https://api.github.com/repos/WangXYZ/TBC/keys{/key_id}","stargazers_url":"https://api.github.com/repos/WangXYZ/TBC/stargazers","has_downloads":1,"commits_url":"https://api.github.com/repos/WangXYZ/TBC/commits{/sha}","downloads_url":"https://api.github.com/repos/WangXYZ/TBC/downloads","default_branch":"master","open_issues":0,"size":9207,"forks_count":0,"id":28678211,"has_wiki":0,"owner":{"starred_url":"https://api.github.com/users/WangXYZ/starred{/owner}{/repo}","url":"https://api.github.com/users/WangXYZ","repos_url":"https://api.github.com/users/WangXYZ/repos","events_url":"https://api.github.com/users/WangXYZ/events{/privacy}","login":"WangXYZ","avatar_url":"https://avatars.githubusercontent.com/u/8252171?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/WangXYZ","received_events_url":"https://api.github.com/users/WangXYZ/received_events","followers_url":"https://api.github.com/users/WangXYZ/followers","following_url":"https://api.github.com/users/WangXYZ/following{/other_user}","gists_url":"https://api.github.com/users/WangXYZ/gists{/gist_id}","type":"Organization","subscriptions_url":"https://api.github.com/users/WangXYZ/subscriptions","organizations_url":"https://api.github.com/users/WangXYZ/orgs","id":8252171},"languages_url":"https://api.github.com/repos/WangXYZ/TBC/languages","git_tags_url":"https://api.github.com/repos/WangXYZ/TBC/git/tags{/sha}","archive_url":"https://api.github.com/repos/WangXYZ/TBC/{archive_format}{/ref}","git_refs_url":"https://api.github.com/repos/WangXYZ/TBC/git/refs{/sha}","trees_url":"https://api.github.com/repos/WangXYZ/TBC/git/trees{/sha}","updated_at":"2014-09-10T17:41:40Z","description":"ACID Scripts for CMaNGOS TBC","forks_url":"https://api.github.com/repos/WangXYZ/TBC/forks","hooks_url":"https://api.github.com/repos/WangXYZ/TBC/hooks","created_at":"2015-01-01T00:59:57Z","fork":1,"forks":0,"subscription_url":"https://api.github.com/repos/WangXYZ/TBC/subscription","compare_url":"https://api.github.com/repos/WangXYZ/TBC/compare/{base}...{head}","url":"https://api.github.com/repos/WangXYZ/TBC","collaborators_url":"https://api.github.com/repos/WangXYZ/TBC/collaborators{/collaborator}","statuses_url":"https://api.github.com/repos/WangXYZ/TBC/statuses/{sha}","comments_url":"https://api.github.com/repos/WangXYZ/TBC/comments{/number}","blobs_url":"https://api.github.com/repos/WangXYZ/TBC/git/blobs{/sha}","html_url":"https://github.com/WangXYZ/TBC","watchers_count":0,"has_issues":0,"has_pages":0,"contents_url":"https://api.github.com/repos/WangXYZ/TBC/contents/{+path}","issue_events_url":"https://api.github.com/repos/WangXYZ/TBC/issues/events{/number}","ssh_url":"git@github.com:WangXYZ/TBC.git","tags_url":"https://api.github.com/repos/WangXYZ/TBC/tags","name":"TBC","issue_comment_url":"https://api.github.com/repos/WangXYZ/TBC/issues/comments/{number}","git_url":"git://github.com/WangXYZ/TBC.git","subscribers_url":"https://api.github.com/repos/WangXYZ/TBC/subscribers","clone_url":"https://github.com/WangXYZ/TBC.git","notifications_url":"https://api.github.com/repos/WangXYZ/TBC/notifications{?since,all,participating}","full_name":"WangXYZ/TBC","private":0,"teams_url":"https://api.github.com/repos/WangXYZ/TBC/teams","milestones_url":"https://api.github.com/repos/WangXYZ/TBC/milestones{/number}","public":1,"git_commits_url":"https://api.github.com/repos/WangXYZ/TBC/git/commits{/sha}","open_issues_count":0,"watchers":0,"contributors_url":"https://api.github.com/repos/WangXYZ/TBC/contributors","branches_url":"https://api.github.com/repos/WangXYZ/TBC/branches{/branch}","stargazers_count":0,"pulls_url":"https://api.github.com/repos/WangXYZ/TBC/pulls{/number}","merges_url":"https://api.github.com/repos/WangXYZ/TBC/merges","assignees_url":"https://api.github.com/repos/WangXYZ/TBC/assignees{/user}"}},"created_at":"2015-01-01T00:59:57Z","id":"2489395755","org":{"gravatar_id":"","url":"https://api.github.com/orgs/ACID-Scripts","id":8674587,"login":"ACID-Scripts","avatar_url":"https://avatars.githubusercontent.com/u/8674587?"},"public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/WangXYZ","id":8252171,"login":"WangXYZ","avatar_url":"https://avatars.githubusercontent.com/u/8252171?"},"repo":{"url":"https://api.github.com/repos/ACID-Scripts/TBC","id":23724137,"name":"ACID-Scripts/TBC"},"type":"ForkEvent"} +7699 {"payload":{"description":"Game using the MS Kinect that scans the user and creates a 3D version of them.","ref":"master","ref_type":"branch","pusher_type":"user","master_branch":"master"},"created_at":"2015-01-01T00:59:57Z","id":"2489395752","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/chrisjimenez","id":3580593,"login":"chrisjimenez","avatar_url":"https://avatars.githubusercontent.com/u/3580593?"},"repo":{"url":"https://api.github.com/repos/chrisjimenez/IpsePuppet","id":28678128,"name":"chrisjimenez/IpsePuppet"},"type":"CreateEvent"} +7698 {"payload":{"issue":{"url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12","created_at":"2015-01-01T00:59:56Z","body":"The bee that are being searched for currently should also have logos and descriptions from the API.","events_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/events","labels_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/labels{/name}","locked":0,"state":"open","comments_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/comments","user":{"starred_url":"https://api.github.com/users/antonioortegajr/starred{/owner}{/repo}","url":"https://api.github.com/users/antonioortegajr","repos_url":"https://api.github.com/users/antonioortegajr/repos","events_url":"https://api.github.com/users/antonioortegajr/events{/privacy}","login":"antonioortegajr","avatar_url":"https://avatars.githubusercontent.com/u/6744175?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/antonioortegajr","received_events_url":"https://api.github.com/users/antonioortegajr/received_events","followers_url":"https://api.github.com/users/antonioortegajr/followers","following_url":"https://api.github.com/users/antonioortegajr/following{/other_user}","gists_url":"https://api.github.com/users/antonioortegajr/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/antonioortegajr/subscriptions","organizations_url":"https://api.github.com/users/antonioortegajr/orgs","id":6744175},"title":"add logos and descriptions to beers being searched","id":53210166,"number":12,"comments":0,"updated_at":"2015-01-01T00:59:56Z","html_url":"https://github.com/antonioortegajr/beerfind.me/issues/12"},"action":"opened"},"created_at":"2015-01-01T00:59:56Z","id":"2489395749","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/antonioortegajr","id":6744175,"login":"antonioortegajr","avatar_url":"https://avatars.githubusercontent.com/u/6744175?"},"repo":{"url":"https://api.github.com/repos/antonioortegajr/beerfind.me","id":28573267,"name":"antonioortegajr/beerfind.me"},"type":"IssuesEvent"} +7697 {"payload":{"action":"closed","pull_request":{"review_comments_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/comments","url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534","body":"","patch_url":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534.patch","head":{"ref":"master","user":{"starred_url":"https://api.github.com/users/bokmadsen/starred{/owner}{/repo}","url":"https://api.github.com/users/bokmadsen","repos_url":"https://api.github.com/users/bokmadsen/repos","events_url":"https://api.github.com/users/bokmadsen/events{/privacy}","login":"bokmadsen","avatar_url":"https://avatars.githubusercontent.com/u/790740?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/bokmadsen","received_events_url":"https://api.github.com/users/bokmadsen/received_events","followers_url":"https://api.github.com/users/bokmadsen/followers","following_url":"https://api.github.com/users/bokmadsen/following{/other_user}","gists_url":"https://api.github.com/users/bokmadsen/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/bokmadsen/subscriptions","organizations_url":"https://api.github.com/users/bokmadsen/orgs","id":790740},"sha":"2a664fd4ac7e8ff340893ad5e4c2f50127ee52c6","label":"bokmadsen:master","repo":{"svn_url":"https://github.com/bokmadsen/Xamarin-Forms-Labs","pushed_at":"2014-12-21T16:59:46Z","url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs","issues_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/issues{/number}","collaborators_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/collaborators{/collaborator}","events_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/events","has_downloads":1,"labels_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/labels{/name}","keys_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/keys{/key_id}","releases_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/releases{/id}","stargazers_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/stargazers","statuses_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/statuses/{sha}","comments_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/comments{/number}","blobs_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/git/blobs{/sha}","html_url":"https://github.com/bokmadsen/Xamarin-Forms-Labs","has_pages":1,"commits_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/commits{/sha}","downloads_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/downloads","contents_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/contents/{+path}","has_issues":0,"issue_events_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/issues/events{/number}","ssh_url":"git@github.com:bokmadsen/Xamarin-Forms-Labs.git","watchers_count":0,"default_branch":"master","open_issues":0,"size":96455,"forks_count":0,"tags_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/tags","id":28305437,"name":"Xamarin-Forms-Labs","has_wiki":1,"issue_comment_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/issues/comments/{number}","owner":{"starred_url":"https://api.github.com/users/bokmadsen/starred{/owner}{/repo}","url":"https://api.github.com/users/bokmadsen","repos_url":"https://api.github.com/users/bokmadsen/repos","events_url":"https://api.github.com/users/bokmadsen/events{/privacy}","login":"bokmadsen","avatar_url":"https://avatars.githubusercontent.com/u/790740?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/bokmadsen","received_events_url":"https://api.github.com/users/bokmadsen/received_events","followers_url":"https://api.github.com/users/bokmadsen/followers","following_url":"https://api.github.com/users/bokmadsen/following{/other_user}","gists_url":"https://api.github.com/users/bokmadsen/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/bokmadsen/subscriptions","organizations_url":"https://api.github.com/users/bokmadsen/orgs","id":790740},"languages_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/languages","git_url":"git://github.com/bokmadsen/Xamarin-Forms-Labs.git","subscribers_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/subscribers","clone_url":"https://github.com/bokmadsen/Xamarin-Forms-Labs.git","notifications_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/notifications{?since,all,participating}","git_tags_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/git/tags{/sha}","archive_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/{archive_format}{/ref}","full_name":"bokmadsen/Xamarin-Forms-Labs","private":0,"teams_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/teams","milestones_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/milestones{/number}","git_refs_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/git/refs{/sha}","git_commits_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/git/commits{/sha}","trees_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/git/trees{/sha}","updated_at":"2014-12-21T16:59:46Z","description":"Xamarin Forms Labs is a open source project that aims to provide a powerful and cross platform set of controls and helpers tailored to work with Xamarin Forms.","forks_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/forks","open_issues_count":0,"hooks_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/hooks","watchers":0,"created_at":"2014-12-21T16:32:52Z","contributors_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/contributors","branches_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/branches{/branch}","stargazers_count":0,"fork":1,"forks":0,"compare_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/compare/{base}...{head}","pulls_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/pulls{/number}","subscription_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/subscription","merges_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/merges","assignees_url":"https://api.github.com/repos/bokmadsen/Xamarin-Forms-Labs/assignees{/user}","language":"C#","homepage":""}},"issue_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/534","statuses_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/statuses/2a664fd4ac7e8ff340893ad5e4c2f50127ee52c6","comments_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/534/comments","diff_url":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534.diff","_links":{"self":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534"},"commits":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/commits"},"review_comments":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/comments"},"statuses":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/statuses/2a664fd4ac7e8ff340893ad5e4c2f50127ee52c6"},"comments":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/534/comments"},"html":{"href":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534"},"issue":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/534"},"review_comment":{"href":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/comments/{number}"}},"title":"Added null check in HybridWebView","base":{"ref":"master","user":{"starred_url":"https://api.github.com/users/XLabs/starred{/owner}{/repo}","url":"https://api.github.com/users/XLabs","repos_url":"https://api.github.com/users/XLabs/repos","events_url":"https://api.github.com/users/XLabs/events{/privacy}","login":"XLabs","avatar_url":"https://avatars.githubusercontent.com/u/7787062?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/XLabs","received_events_url":"https://api.github.com/users/XLabs/received_events","followers_url":"https://api.github.com/users/XLabs/followers","following_url":"https://api.github.com/users/XLabs/following{/other_user}","gists_url":"https://api.github.com/users/XLabs/gists{/gist_id}","type":"Organization","subscriptions_url":"https://api.github.com/users/XLabs/subscriptions","organizations_url":"https://api.github.com/users/XLabs/orgs","id":7787062},"sha":"6906dd38ff69debcc304cb05b6877fae71747acd","label":"XLabs:master","repo":{"svn_url":"https://github.com/XLabs/Xamarin-Forms-Labs","pushed_at":"2015-01-01T00:59:12Z","url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs","issues_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues{/number}","collaborators_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/collaborators{/collaborator}","events_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/events","has_downloads":1,"labels_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/labels{/name}","keys_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/keys{/key_id}","releases_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/releases{/id}","stargazers_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/stargazers","statuses_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/statuses/{sha}","comments_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/comments{/number}","blobs_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/git/blobs{/sha}","html_url":"https://github.com/XLabs/Xamarin-Forms-Labs","has_pages":1,"commits_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/commits{/sha}","downloads_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/downloads","contents_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/contents/{+path}","has_issues":1,"issue_events_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/events{/number}","ssh_url":"git@github.com:XLabs/Xamarin-Forms-Labs.git","watchers_count":340,"default_branch":"master","open_issues":92,"size":104805,"forks_count":210,"tags_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/tags","id":20463939,"name":"Xamarin-Forms-Labs","has_wiki":1,"issue_comment_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/issues/comments/{number}","owner":{"starred_url":"https://api.github.com/users/XLabs/starred{/owner}{/repo}","url":"https://api.github.com/users/XLabs","repos_url":"https://api.github.com/users/XLabs/repos","events_url":"https://api.github.com/users/XLabs/events{/privacy}","login":"XLabs","avatar_url":"https://avatars.githubusercontent.com/u/7787062?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/XLabs","received_events_url":"https://api.github.com/users/XLabs/received_events","followers_url":"https://api.github.com/users/XLabs/followers","following_url":"https://api.github.com/users/XLabs/following{/other_user}","gists_url":"https://api.github.com/users/XLabs/gists{/gist_id}","type":"Organization","subscriptions_url":"https://api.github.com/users/XLabs/subscriptions","organizations_url":"https://api.github.com/users/XLabs/orgs","id":7787062},"languages_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/languages","git_url":"git://github.com/XLabs/Xamarin-Forms-Labs.git","subscribers_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/subscribers","clone_url":"https://github.com/XLabs/Xamarin-Forms-Labs.git","notifications_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/notifications{?since,all,participating}","git_tags_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/git/tags{/sha}","archive_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/{archive_format}{/ref}","full_name":"XLabs/Xamarin-Forms-Labs","private":0,"teams_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/teams","milestones_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/milestones{/number}","git_refs_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/git/refs{/sha}","git_commits_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/git/commits{/sha}","trees_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/git/trees{/sha}","updated_at":"2015-01-01T00:59:13Z","description":"Xamarin Forms Labs is a open source project that aims to provide a powerful and cross platform set of controls and helpers tailored to work with Xamarin Forms.","forks_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/forks","open_issues_count":92,"hooks_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/hooks","watchers":340,"created_at":"2014-06-03T23:53:11Z","contributors_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/contributors","branches_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/branches{/branch}","stargazers_count":340,"fork":0,"forks":210,"compare_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/compare/{base}...{head}","pulls_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls{/number}","subscription_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/subscription","merges_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/merges","assignees_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/assignees{/user}","language":"C#","homepage":""}},"updated_at":"2015-01-01T00:59:55Z","html_url":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534","commits_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/commits","created_at":"2014-12-21T17:00:20Z","locked":0,"state":"closed","user":{"starred_url":"https://api.github.com/users/bokmadsen/starred{/owner}{/repo}","url":"https://api.github.com/users/bokmadsen","repos_url":"https://api.github.com/users/bokmadsen/repos","events_url":"https://api.github.com/users/bokmadsen/events{/privacy}","login":"bokmadsen","avatar_url":"https://avatars.githubusercontent.com/u/790740?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/bokmadsen","received_events_url":"https://api.github.com/users/bokmadsen/received_events","followers_url":"https://api.github.com/users/bokmadsen/followers","following_url":"https://api.github.com/users/bokmadsen/following{/other_user}","gists_url":"https://api.github.com/users/bokmadsen/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/bokmadsen/subscriptions","organizations_url":"https://api.github.com/users/bokmadsen/orgs","id":790740},"id":26429779,"number":534,"review_comment_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/comments/{number}","additions":16,"changed_files":1,"mergeable_state":"clean","review_comments":0,"deletions":14,"merged":0,"merge_commit_sha":"9cda5a97584b1ad549fff2dcaaf304adeb27ae07","commits":1,"closed_at":"2015-01-01T00:59:55Z","mergeable":1,"comments":0},"number":534},"created_at":"2015-01-01T00:59:55Z","id":"2489395745","org":{"gravatar_id":"","url":"https://api.github.com/orgs/XLabs","id":7787062,"login":"XLabs","avatar_url":"https://avatars.githubusercontent.com/u/7787062?"},"public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/rmarinho","id":1235097,"login":"rmarinho","avatar_url":"https://avatars.githubusercontent.com/u/1235097?"},"repo":{"url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs","id":20463939,"name":"XLabs/Xamarin-Forms-Labs"},"type":"PullRequestEvent"} +7696 {"payload":{"commits":[{"sha":"c356eac8fa4409a1aa794ab07244250a862da03b","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee038f@gmail.com","name":"Zaryafaraj"},"message":"reserve modal styles","distinct":true,"url":"https://api.github.com/repos/Fathalian/Guild/commits/c356eac8fa4409a1aa794ab07244250a862da03b"},{"sha":"9a773fc648910c7a2499401f44a6e5f71eb30460","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee038f@gmail.com","name":"Zaryafaraj"},"message":"Merge branch 'master' of https://github.com/Fathalian/Guild\\n\\nConflicts:\\n\\tapp/templates/reserveModal.html","distinct":true,"url":"https://api.github.com/repos/Fathalian/Guild/commits/9a773fc648910c7a2499401f44a6e5f71eb30460"}],"before":"da6d28eba11c89836e132bbba032c22d92e4f233","head":"9a773fc648910c7a2499401f44a6e5f71eb30460","size":2,"push_id":536752114,"ref":"refs/heads/master","distinct_size":2},"created_at":"2015-01-01T00:59:55Z","id":"2489395744","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/Zaryafaraj","id":1356088,"login":"Zaryafaraj","avatar_url":"https://avatars.githubusercontent.com/u/1356088?"},"repo":{"url":"https://api.github.com/repos/Fathalian/Guild","id":26995510,"name":"Fathalian/Guild"},"type":"PushEvent"} +7695 {"payload":{"commits":[{"sha":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","author":{"email":"af59d1d6805404937849f05dafd5a911888fd7a4@gmail.com","name":"Kevin Hofmaenner"},"message":"minor UI tweak","distinct":true,"url":"https://api.github.com/repos/kevinhofmaenner/blackjack/commits/17cf9ea07662d74b2d5bac1cf976f5853a63920d"}],"before":"6b4f5af0a2a70bbe00cd88281823b436d506ff2d","head":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","size":1,"push_id":536752112,"ref":"refs/heads/master","distinct_size":1},"created_at":"2015-01-01T00:59:55Z","id":"2489395742","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/kevinhofmaenner","id":10161858,"login":"kevinhofmaenner","avatar_url":"https://avatars.githubusercontent.com/u/10161858?"},"repo":{"url":"https://api.github.com/repos/kevinhofmaenner/blackjack","id":28652857,"name":"kevinhofmaenner/blackjack"},"type":"PushEvent"} +7694 {"payload":{"commits":[{"sha":"aa8ec0de017c8003758776739facc819e33ac7c9","author":{"email":"e0e04a2320844b42511db0376599e166ab5bda54@gmail.com","name":"Runhang Li"},"message":"finish all hamms test","distinct":true,"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test/commits/aa8ec0de017c8003758776739facc819e33ac7c9"}],"before":"2bb795fc30fc15ab85bcc10f894bfcfa118d69bc","head":"aa8ec0de017c8003758776739facc819e33ac7c9","size":1,"push_id":536752109,"ref":"refs/heads/master","distinct_size":1},"created_at":"2015-01-01T00:59:53Z","id":"2489395735","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/marklrh","id":3656079,"login":"marklrh","avatar_url":"https://avatars.githubusercontent.com/u/3656079?"},"repo":{"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test","id":27470715,"name":"marklrh/ocaml-cohttp-test"},"type":"PushEvent"} +7693 {"payload":{"commits":[{"sha":"0b27989723feb4b183d5f87813fef146b670b1d1","author":{"email":"7c5a0c567b5584a13fde407456875318a5bec977@gmail.com","name":"Raphaël Benitte"},"message":"Add time clock widget + Improve stylus theming","distinct":true,"url":"https://api.github.com/repos/plouc/mozaik/commits/0b27989723feb4b183d5f87813fef146b670b1d1"}],"before":"7ddf17eb74fff5adad6e2feb72bcb627d4644800","head":"0b27989723feb4b183d5f87813fef146b670b1d1","size":1,"push_id":536752104,"ref":"refs/heads/master","distinct_size":1},"created_at":"2015-01-01T00:59:51Z","id":"2489395728","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/plouc","id":501642,"login":"plouc","avatar_url":"https://avatars.githubusercontent.com/u/501642?"},"repo":{"url":"https://api.github.com/repos/plouc/mozaik","id":28498113,"name":"plouc/mozaik"},"type":"PushEvent"} + +-- !sql_36_3 -- +2 {"updated_value":10} + +-- !sql_37 -- +1 {"a":""} +1 {"a":"1"} +1 {"a":1} +1 {"a":1} + +-- !sql_38 -- +3 abd {"d":1} + +-- !sql_31 -- +kaana + diff --git a/regression-test/data/variant_p0/multi_var.out b/regression-test/data/variant_p0/multi_var.out new file mode 100644 index 0000000000..18e31a4a5a --- /dev/null +++ b/regression-test/data/variant_p0/multi_var.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 \N \N \N \N \N +1 hello world \N \N \N \N +1 hello world [1234] \N \N \N +1 hello world [1234] \N \N \N +1 hello world [1234] \N \N \N +1 hello world [1234] \N \N \N +1 hello world [1234] \N \N \N +1 hello world [1234] \N \N \N +1 \N \N \N \N \N +1 hello world \N \N \N \N + +-- !sql -- +\N \N \N 123 \N \N +\N \N \N 123 \N \N +\N \N \N 123 elden ring \N +\N \N \N 123 elden ring \N +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 + +-- !sql -- +\N \N \N 123 \N \N +\N \N \N 123 elden ring \N +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 elden ring 1.1112 +\N \N \N 123 \N \N +\N \N \N 123 elden ring \N + diff --git a/regression-test/data/variant_p0/schema_change.out b/regression-test/data/variant_p0/schema_change.out new file mode 100644 index 0000000000..0c7852ac72 --- /dev/null +++ b/regression-test/data/variant_p0/schema_change.out @@ -0,0 +1,49 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + +-- !sql -- +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + +-- !sql -- +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + +-- !sql -- +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world + diff --git a/regression-test/data/variant_p0/sql/gh_data.out b/regression-test/data/variant_p0/sql/gh_data.out new file mode 100644 index 0000000000..7fc5e10dbe --- /dev/null +++ b/regression-test/data/variant_p0/sql/gh_data.out @@ -0,0 +1,65 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !gh_data -- +0 + +-- !gh_data_2 -- +5000 + +-- !gh_data_3 -- +leonardomso/33-js-concepts 3 +ytdl-org/youtube-dl 3 +Bogdanp/neko 2 +bminossi/AllVideoPocsFromHackerOne 2 +disclose/diodata 2 + +-- !gh_data_4 -- +14690758274 + +-- !gh_data_5 -- +73453762334584 + +-- !gh_data_6 -- +457806339 + +-- !gh_data_7 -- +0 + +-- !gh_data_8 -- +19829 + +-- !gh_data_9 -- +49390617 +64890096 +10696700 +33066637 +32271952 +2051941 +32271952 +57325392 +42386044 +73801003 + +-- !gh_data_10 -- +27 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746717","public":1,"actor":{"gravatar_id":"","display_login":"sergdudnik","url":"https://api.github.com/users/sergdudnik","id":16341546,"login":"sergdudnik","avatar_url":"https://avatars.githubusercontent.com/u/16341546?"},"repo":{"url":"https://api.github.com/repos/leonardomso/33-js-concepts","id":147350463,"name":"leonardomso/33-js-concepts"},"type":"WatchEvent"} +36 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746732","public":1,"actor":{"gravatar_id":"","display_login":"juliusHuelsmann","url":"https://api.github.com/users/juliusHuelsmann","id":9212314,"login":"juliusHuelsmann","avatar_url":"https://avatars.githubusercontent.com/u/9212314?"},"repo":{"url":"https://api.github.com/repos/odeke-em/drive","id":26109545,"name":"odeke-em/drive"},"type":"WatchEvent"} +46 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746749","org":{"gravatar_id":"","url":"https://api.github.com/orgs/GO-LiFE","id":38434522,"login":"GO-LiFE","avatar_url":"https://avatars.githubusercontent.com/u/38434522?"},"public":1,"actor":{"gravatar_id":"","display_login":"okbean","url":"https://api.github.com/users/okbean","id":75969386,"login":"okbean","avatar_url":"https://avatars.githubusercontent.com/u/75969386?"},"repo":{"url":"https://api.github.com/repos/GO-LiFE/GoFIT_SDK_Android","id":141905736,"name":"GO-LiFE/GoFIT_SDK_Android"},"type":"WatchEvent"} +56 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:28Z","id":"14690746773","public":1,"actor":{"gravatar_id":"","display_login":"PWDream","url":"https://api.github.com/users/PWDream","id":4903755,"login":"PWDream","avatar_url":"https://avatars.githubusercontent.com/u/4903755?"},"repo":{"url":"https://api.github.com/repos/MrXujiang/h5-Dooring","id":289417971,"name":"MrXujiang/h5-Dooring"},"type":"WatchEvent"} +86 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746843","public":1,"actor":{"gravatar_id":"","display_login":"Gui-Yom","url":"https://api.github.com/users/Gui-Yom","id":25181283,"login":"Gui-Yom","avatar_url":"https://avatars.githubusercontent.com/u/25181283?"},"repo":{"url":"https://api.github.com/repos/redsaph/cleartext","id":106453399,"name":"redsaph/cleartext"},"type":"WatchEvent"} +98 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746866","org":{"gravatar_id":"","url":"https://api.github.com/orgs/sherlock-project","id":48293496,"login":"sherlock-project","avatar_url":"https://avatars.githubusercontent.com/u/48293496?"},"public":1,"actor":{"gravatar_id":"","display_login":"humaidk2","url":"https://api.github.com/users/humaidk2","id":12982026,"login":"humaidk2","avatar_url":"https://avatars.githubusercontent.com/u/12982026?"},"repo":{"url":"https://api.github.com/repos/sherlock-project/sherlock","id":162998479,"name":"sherlock-project/sherlock"},"type":"WatchEvent"} +101 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746870","public":1,"actor":{"gravatar_id":"","display_login":"hasantezcan","url":"https://api.github.com/users/hasantezcan","id":32804505,"login":"hasantezcan","avatar_url":"https://avatars.githubusercontent.com/u/32804505?"},"repo":{"url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying","id":326215605,"name":"okandavut/react-spotify-nowplaying"},"type":"WatchEvent"} +112 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746899","public":1,"actor":{"gravatar_id":"","display_login":"nicholas-robertson","url":"https://api.github.com/users/nicholas-robertson","id":17681331,"login":"nicholas-robertson","avatar_url":"https://avatars.githubusercontent.com/u/17681331?"},"repo":{"url":"https://api.github.com/repos/sentriz/gonic","id":178435468,"name":"sentriz/gonic"},"type":"WatchEvent"} +122 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746914","org":{"gravatar_id":"","url":"https://api.github.com/orgs/netlify-labs","id":47546088,"login":"netlify-labs","avatar_url":"https://avatars.githubusercontent.com/u/47546088?"},"public":1,"actor":{"gravatar_id":"","display_login":"javaniecampbell","url":"https://api.github.com/users/javaniecampbell","id":1676496,"login":"javaniecampbell","avatar_url":"https://avatars.githubusercontent.com/u/1676496?"},"repo":{"url":"https://api.github.com/repos/netlify-labs/react-netlify-identity-widget","id":182606378,"name":"netlify-labs/react-netlify-identity-widget"},"type":"WatchEvent"} +169 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:32Z","id":"14690747028","org":{"gravatar_id":"","url":"https://api.github.com/orgs/microsoft","id":6154722,"login":"microsoft","avatar_url":"https://avatars.githubusercontent.com/u/6154722?"},"public":1,"actor":{"gravatar_id":"","display_login":"Yxnt","url":"https://api.github.com/users/Yxnt","id":10323352,"login":"Yxnt","avatar_url":"https://avatars.githubusercontent.com/u/10323352?"},"repo":{"url":"https://api.github.com/repos/microsoft/BotBuilder-Samples","id":68730444,"name":"microsoft/BotBuilder-Samples"},"type":"WatchEvent"} + +-- !gh_data_11 -- +2051941 1 +10696700 1 +32271952 2 +33066637 1 +42386044 1 +49390617 1 +57325392 1 +59654005 1 +64890096 1 +73801003 1 + diff --git a/regression-test/data/variant_p0/with_index/load.out b/regression-test/data/variant_p0/with_index/load.out new file mode 100644 index 0000000000..61ffbfbe2e --- /dev/null +++ b/regression-test/data/variant_p0/with_index/load.out @@ -0,0 +1,28 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_inv_1 -- +0 +123 + +-- !sql_inv_2 -- +123 + +-- !sql_inv_3 -- +3 {"a":123} hello world + +-- !sql_inv4 -- +0 + +-- !sql_inv5 -- +1 {"b1":3,"a1":0} hello world +2 {"a2":123} world +3 {"a3":123} hello world +4 {"b2":3,"b1":0} hello world +5 {"b2":123} world +6 {"b3":123} hello world + +-- !sql_inv6 -- +9 {"a3":123} hello world +8 {"a2":123} world +7 {"b1":3,"a1":0} hello world +6 {"b3":123} hello world + diff --git a/regression-test/data/variant_p0/with_index/var_index.out b/regression-test/data/variant_p0/with_index/var_index.out new file mode 100644 index 0000000000..a1fa5c0444 --- /dev/null +++ b/regression-test/data/variant_p0/with_index/var_index.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2 {"c":1181111,"a":18811,"b":"hello world"} +3 {"c":11111,"a":18811,"b":"hello wworld"} +4 {"c":8181111,"a":1234,"b":"hello xxx world"} + +-- !sql -- +2 {"c":1181111,"a":18811,"b":"hello world"} +4 {"c":8181111,"a":1234,"b":"hello xxx world"} + diff --git a/regression-test/data/variant_p2/load.out b/regression-test/data/variant_p2/load.out new file mode 100644 index 0000000000..86df6f121e --- /dev/null +++ b/regression-test/data/variant_p2/load.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +44273863 + diff --git a/regression-test/data/variant_p2/sql/authorsWithTheMostPushes.out b/regression-test/data/variant_p2/sql/authorsWithTheMostPushes.out new file mode 100644 index 0000000000..569a84d202 --- /dev/null +++ b/regression-test/data/variant_p2/sql/authorsWithTheMostPushes.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !authorsWithTheMostPushes -- +KenanSulayman 150218 31 +greatfirebot 102604 4 +mirror-updates 84117 42 +greatfire 45199 2 +rydnr 43285 18 +pbaffiliate1 41473 6 +chapuni 35820 18 +asfgit 34623 391 +meatballhat 31119 57 +direwolf-github 28690 1 +openstack-gerrit 28358 473 +diversify-exp-user 25708 2 +dsm-git 25470 73 +wmst 23587 1 +qdm 21165 1 +cn-nytimes 20415 1 +wmfgerrit 19150 761 +maomihz 18291 2 +lukeis 17764 11 +cato- 16394 8 +efa2000 16010 6 +hubot 15856 306 +pluginmirror-worker 15692 3283 +k-okada 13944 86 +pbaffiliatetwoop 13892 3 +timmmmyboy 12912 11 +gnomesysadmins 12742 383 +micahyoung 12368 5 +InternetDevels 12344 3 +bmorganatlas 12018 2 +keum 11906 11 +eotect 11367 7 +kinlane 11248 67 +alma-dev 11224 1369 +hwine 11116 16 +alexcrichton 10976 85 +abeta 9648 9 +brucemcpherson 9277 95 +xinwendashibaike 8915 5 +cmsbuild 8713 4 +kodekloud 7819 30 +uqs 7748 3 +malevolm 7578 3 +unicoremachina 7550 152 +linphone-sync-bot 7542 22 +angelventura 7392 6 +xndcn 7226 9 +designerwebhosting 7131 8 +swegener 7076 2 +CocoaPodsBot 6650 3 + diff --git a/regression-test/data/variant_p2/sql/countingStar1.out b/regression-test/data/variant_p2/sql/countingStar1.out new file mode 100644 index 0000000000..f60d8e6483 --- /dev/null +++ b/regression-test/data/variant_p2/sql/countingStar1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar1 -- +3951085 + diff --git a/regression-test/data/variant_p2/sql/countingStar2.out b/regression-test/data/variant_p2/sql/countingStar2.out new file mode 100644 index 0000000000..8c140a9300 --- /dev/null +++ b/regression-test/data/variant_p2/sql/countingStar2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar2 -- +started 3951085 + diff --git a/regression-test/data/variant_p2/sql/countingStar3.out b/regression-test/data/variant_p2/sql/countingStar3.out new file mode 100644 index 0000000000..d447b2edd8 --- /dev/null +++ b/regression-test/data/variant_p2/sql/countingStar3.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !countingStar3 -- +4925 + diff --git a/regression-test/data/variant_p2/sql/distributionOfRepositoriesByStarCount.out b/regression-test/data/variant_p2/sql/distributionOfRepositoriesByStarCount.out new file mode 100644 index 0000000000..6a067f9a3a --- /dev/null +++ b/regression-test/data/variant_p2/sql/distributionOfRepositoriesByStarCount.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !distributionOfRepositoriesByStarCount -- +1 523331 +10 42165 +100 5258 +1000 270 +10000 1 + diff --git a/regression-test/data/variant_p2/sql/githubRoulette.out b/regression-test/data/variant_p2/sql/githubRoulette.out new file mode 100644 index 0000000000..0d0bb06898 --- /dev/null +++ b/regression-test/data/variant_p2/sql/githubRoulette.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !githubRoulette -- +SirVer/ultisnips +jessesquires/JSQMessagesViewController +DanielGorlo/ISIS.js +idris-lang/idris-tutorial +xiaobozi/youku-lixian +sorentwo/readthis +tomakehurst/wiremock +idris-lang/Idris-dev +angular/angular-seed +erming/shout +substack/geodetic-to-ecef +msgpack/msgpack-java +ropensci/webservices +Learn-Dev/Learn-Dev-Theme---Dashboard-partie-1 +greggman/webgl-fundamentals +suffick/Tearable-Cloth +mlemerre/l-lang +glfw/glfw +simplegeo/python-geohash +chrislusf/weed-fs +eviltrout/ember-renderspeed +ejurgensen/forked-daapd +idris-hackers/idris-vim +evanbrooks/syntax-highlight +jpsarda/Pixel-based-destructible-ground-with-Cocos2d-iPhone +tyler/trie +ilirb/ahk-scripts +adafruit/Adafruit_SSD1306 +basecamp/bcx-api +codegangsta/cli +dockerboard/dockerboard +csmith-project/csmith +josh/cafe-js +docker/fig +auchenberg/chrome-devtools-app +kenjiSpecial/100day-canvas-bootcamp-training +SchemaPlus/schema_plus +kaimu/ionic-vs2013-intellisense +SignalR/SignalR +sunng87/node-geohash +joyent/node +codelucas/newspaper +prakhar1989/awesome-courses +coreos/etcd +cssdream/cssgrace +spf13/hugo +maxwellito/vivus +WhisperSystems/RedPhone +omz/AppSales-Mobile +casatt/html5-videoEditor + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.out new file mode 100644 index 0000000000..b6b264b11f --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears1 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.out new file mode 100644 index 0000000000..1f82eb03fa --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears2 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.out new file mode 100644 index 0000000000..8b3156f98f --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears3 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.out new file mode 100644 index 0000000000..496e681f6d --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears4 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.out new file mode 100644 index 0000000000..13116d3cc7 --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears5 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.out new file mode 100644 index 0000000000..16eb965b64 --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears6 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.out b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.out new file mode 100644 index 0000000000..998b22fa77 --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheListOfTopRepositoriesChangedOverTheYears7 -- +facebook/react-native 2015 10021 +phanan/htaccess 2015 7964 +alex/what-happens-when 2015 7407 +facebook/react 2015 6762 +moklick/frontend-stuff 2015 6528 +prakhar1989/awesome-courses 2015 6110 +yaronn/blessed-contrib 2015 5880 +arasatasaygin/is.js 2015 5498 +vhf/free-programming-books 2015 5437 +tiimgreen/github-cheat-sheet 2015 5420 + diff --git a/regression-test/data/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.out b/regression-test/data/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.out new file mode 100644 index 0000000000..e089d1fb78 --- /dev/null +++ b/regression-test/data/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !howHasTheTotalNumberOfStarsChangedOverTime -- +2015 3951085 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments1.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments1.out new file mode 100644 index 0000000000..65a9891bf1 --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments1 -- +4237859 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments2.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments2.out new file mode 100644 index 0000000000..fe95c99e16 --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments2 -- +chrsmith/bwapi 28873 +owncloud/core 24124 +apache/spark 21708 +chrsmith/google-api-java-client 20317 +rust-lang/rust 19251 +GoogleCloudPlatform/kubernetes 16616 +docker/docker 16562 +cms-sw/cmssw 15740 +chrsmith/reaver-wps 14305 +tgstation/-tg-station 13022 +chrsmith/html5rocks 10539 +chrsmith/open-ig 10130 +iojs/io.js 8927 +JuliaLang/julia 8918 +saltstack/salt 8792 +Homebrew/homebrew 8192 +spyder-ide/spyder 7874 +SiCKRAGETV/sickrage-issues 7635 +pychess/pychess 7618 +joomla/joomla-cms 7367 +xbmc/xbmc 7320 +openshift/origin 7242 +ManageIQ/manageiq 6862 +mozilla-b2g/gaia 6791 +ConEmu/old-issues 6731 +edx/edx-platform 6595 +dotnet/roslyn 6348 +chrsmith/hedgewars 6134 +rails/rails 5919 +atom/atom 5885 +brianchandotcom/liferay-portal 5851 +NixOS/nixpkgs 5475 +yiisoft/yii2 5173 +FortAwesome/Font-Awesome 4863 +neovim/neovim 4770 +scikit-learn/scikit-learn 4542 +angular/angular.js 4524 +CleverRaven/Cataclysm-DDA 4372 +symfony/symfony 4354 +CartoDB/cartodb 4352 +ember-cli/ember-cli 4299 +ceph/ceph 4263 +npm/npm 4252 +sphinx-doc/testing2 4220 +laravel/framework 4217 +RIOT-OS/RIOT 4091 +gorhill/uBlock 4088 +rust-lang/rfcs 4051 +OpenRA/OpenRA 3993 +Microsoft/TypeScript 3963 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments3.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments3.out new file mode 100644 index 0000000000..a9b5b17ca0 --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments3 -- +chrsmith/bwapi 28873 499 58 +owncloud/core 24124 3172 8 +apache/spark 21708 1693 13 +chrsmith/google-api-java-client 20317 782 26 +rust-lang/rust 19251 4525 4 +GoogleCloudPlatform/kubernetes 16616 3057 5 +docker/docker 16562 2933 6 +cms-sw/cmssw 15740 1678 9 +chrsmith/reaver-wps 14305 544 26 +tgstation/-tg-station 13022 1786 7 +chrsmith/html5rocks 10539 884 12 +chrsmith/open-ig 10130 898 11 +iojs/io.js 8927 1124 8 +JuliaLang/julia 8918 1383 6 +saltstack/salt 8792 2386 4 +Homebrew/homebrew 8192 2315 4 +spyder-ide/spyder 7874 1813 4 +SiCKRAGETV/sickrage-issues 7635 766 10 +pychess/pychess 7618 930 8 +joomla/joomla-cms 7367 1276 6 +xbmc/xbmc 7320 836 9 +openshift/origin 7242 908 8 +ManageIQ/manageiq 6862 1180 6 +mozilla-b2g/gaia 6791 2118 3 +ConEmu/old-issues 6731 1120 6 +edx/edx-platform 6595 1086 6 +dotnet/roslyn 6348 1304 5 +chrsmith/hedgewars 6134 862 7 +rails/rails 5919 1580 4 +atom/atom 5885 1469 4 +brianchandotcom/liferay-portal 5851 2221 3 +NixOS/nixpkgs 5475 1335 4 +yiisoft/yii2 5173 1302 4 +FortAwesome/Font-Awesome 4863 1457 3 +neovim/neovim 4770 628 8 +scikit-learn/scikit-learn 4542 636 7 +angular/angular.js 4524 1223 4 +CleverRaven/Cataclysm-DDA 4372 865 5 +symfony/symfony 4354 1188 4 +CartoDB/cartodb 4352 1026 4 +ember-cli/ember-cli 4299 877 5 +ceph/ceph 4263 978 4 +npm/npm 4252 1320 3 +sphinx-doc/testing2 4220 1416 3 +laravel/framework 4217 1138 4 +RIOT-OS/RIOT 4091 631 6 +gorhill/uBlock 4088 671 6 +rust-lang/rfcs 4051 384 11 +OpenRA/OpenRA 3993 691 6 +Microsoft/TypeScript 3963 974 4 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments4.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments4.out new file mode 100644 index 0000000000..c4406b2309 --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments4 -- +tphongio/elasticbox-plugin 1 3216 +TTMTT/iCL0udin 1 1844 +iiordanov/remote-desktop-clients 39 1473 +chrsmith/bwapi 134 1200 +codecov/ci-repo 1 771 +chrsmith/bwapi 145 528 +xbmc/xbmc 6227 515 +chrsmith/bwapi 322 480 +TrigonaMinima/TrigonaMinima.github.io 3 450 +chrsmith/bwapi 190 432 +Wouter1/EMU-driver 40 385 +chrsmith/reaver-wps 195 368 +mtambara/liferay-portal 22 367 +chrsmith/bwapi 104 352 +chrsmith/google-api-java-client 18 351 +Flyer53/jsPanel 23 338 +chrsmith/bwapi 93 336 +chrsmith/bwapi 113 336 +synergy/synergy 4349 322 +chrsmith/bwapi 142 320 +chrsmith/reaver-wps 16 316 +chrsmith/reaver-wps 158 316 +neovim/neovim 1820 316 +openmicroscopy/snoopys-sandbox 14 310 +owncloud/core 14472 295 +mtambara/liferay-portal 21 286 +chrsmith/reaver-wps 6 284 +mtambara/liferay-portal 25 282 +chrsmith/reaver-wps 129 280 +Tribler/tribler 1210 277 +OpenELEC/OpenELEC.tv 3726 275 +chrsmith/google-api-java-client 2 273 +chrsmith/reaver-wps 203 260 +letsgetrandy/DICSS 16 259 +neovim/neovim 2076 257 +chrsmith/bwapi 96 256 +chrsmith/bwapi 198 256 +doraTeX/TeX2img 14 255 +chrsmith/bwapi 19 255 +iojs/website 125 254 +tgstation/-tg-station 7420 254 +chrsmith/bwapi 232 252 +chrsmith/bwapi 281 247 +chrsmith/bwapi 120 240 +chrsmith/bwapi 169 240 +chrsmith/bwapi 373 240 +chrsmith/reaver-wps 90 232 +luc-github/Repetier-Firmware-0.92 10 227 +aosp-exchange-group/about 1 226 +chrsmith/bwapi 153 224 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments5.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments5.out new file mode 100644 index 0000000000..9b00c81679 --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments5.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments5 -- +iiordanov/remote-desktop-clients 39 1473 +chrsmith/bwapi 134 1200 +chrsmith/bwapi 145 528 +xbmc/xbmc 6227 515 +chrsmith/bwapi 322 480 +chrsmith/bwapi 190 432 +Wouter1/EMU-driver 40 385 +chrsmith/reaver-wps 195 368 +mtambara/liferay-portal 22 367 +chrsmith/bwapi 104 352 +chrsmith/google-api-java-client 18 351 +Flyer53/jsPanel 23 338 +chrsmith/bwapi 93 336 +chrsmith/bwapi 113 336 +synergy/synergy 4349 322 +chrsmith/bwapi 142 320 +chrsmith/reaver-wps 16 316 +chrsmith/reaver-wps 158 316 +neovim/neovim 1820 316 +openmicroscopy/snoopys-sandbox 14 310 +owncloud/core 14472 295 +mtambara/liferay-portal 21 286 +mtambara/liferay-portal 25 282 +chrsmith/reaver-wps 129 280 +Tribler/tribler 1210 277 +OpenELEC/OpenELEC.tv 3726 275 +chrsmith/reaver-wps 203 260 +letsgetrandy/DICSS 16 259 +neovim/neovim 2076 257 +chrsmith/bwapi 96 256 +chrsmith/bwapi 198 256 +chrsmith/bwapi 19 255 +doraTeX/TeX2img 14 255 +iojs/website 125 254 +tgstation/-tg-station 7420 254 +chrsmith/bwapi 232 252 +chrsmith/bwapi 281 247 +chrsmith/bwapi 120 240 +chrsmith/bwapi 169 240 +chrsmith/bwapi 373 240 +chrsmith/reaver-wps 90 232 +chrsmith/bwapi 153 224 +lhorie/mithril.js 413 223 +ValveSoftware/steam-for-linux 3671 222 +iojs/io.js 978 221 +xbmc/xbmc 5329 221 +chrsmith/google-api-java-client 361 216 +dotnet/roslyn 98 215 +chrsmith/bwapi 135 208 +limelight-stream/limelight-ios 20 207 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments6.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments6.out new file mode 100644 index 0000000000..c354bb906d --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments6.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments6 -- +iiordanov/remote-desktop-clients 39 1473 1023 +xbmc/xbmc 6227 515 17 +Flyer53/jsPanel 23 338 5 +synergy/synergy 4349 322 108 +neovim/neovim 1820 316 21 +owncloud/core 14472 295 8 +OpenELEC/OpenELEC.tv 3726 275 18 +letsgetrandy/DICSS 16 259 138 +neovim/neovim 2076 257 30 +iojs/website 125 254 160 +tgstation/-tg-station 7420 254 18 +lhorie/mithril.js 413 223 30 +ValveSoftware/steam-for-linux 3671 222 119 +iojs/io.js 978 221 76 +xbmc/xbmc 5329 221 6 +dotnet/roslyn 98 215 99 +limelight-stream/limelight-ios 20 207 154 +6to5/6to5 596 204 51 +raspberrypi/firmware 377 193 5 +isaacs/github 18 183 179 +SiCKRAGETV/sickrage-issues 589 180 5 +SiCKRAGETV/sickrage-issues 1023 179 4 +MarlinFirmware/Marlin 1209 179 25 +owncloud/core 14151 173 8 +rust-lang/rfcs 803 170 37 +rackerlabs/repose 1149 170 4 +Shani-08/ShaniXBMCWork 74 167 15 +easydigitaldownloads/Easy-Digital-Downloads 2548 164 9 +JorgenPhi/php-snapchat 89 158 19 +wf9a5m75/phonegap-googlemaps-plugin 408 157 5 +wrye-bash/wrye-bash 187 155 6 +catalinii/minisatip 21 148 5 +whatwg/fetch 27 146 15 +docker/docker 4036 145 46 +angular/angular.dart 1650 144 6 +rust-lang/rfcs 560 142 28 +SynoCommunity/spksrc 1478 140 24 +angular/angular.dart 1647 140 6 +ajaxorg/cloud9 3200 140 41 +docker/docker 9882 139 26 +iojs/io.js 758 136 10 +joomla/joomla-cms 5140 135 12 +aerogear/aerogear-unifiedpush-server 502 134 6 +UV-CDAT/uvcdat 1113 134 5 +apache/spark 3916 134 8 +LaurentGomila/SFML 757 132 8 +xbmc/xbmc 5561 132 11 +trakt/script.trakt 190 130 28 +owncloud/core 11884 129 8 +owncloud/core 12801 129 12 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments7.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments7.out new file mode 100644 index 0000000000..747ff0ff0f --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments7.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments7 -- +miabot/galleries.csv 21872 1 +NREL/EnergyPlus 3874 14 +servo/servo 3649 14 +jirikuncar/invenio 3292 3 +rust-lang/rust 1513 34 +coala-analyzer/coala 1445 7 +NREL/OpenStudio 1327 11 +mono/MonoGame 1224 14 +Wikia/app 852 24 +Microsoft/TypeScript 760 23 +JuliaLang/julia 732 46 +netguru/people 732 28 +TrinityCore/TrinityCore 630 126 +NixOS/nixpkgs 524 78 +FreeBSDFoundation/freebsd 516 6 +magnumripper/JohnTheRipper 501 8 +netguru-training/foodempire 474 1 +jruby/jruby 429 18 +junmin-zhu/blink-crosswalk 411 3 +AnnaPonomareva/ybackend 381 3 +OsmSharp/OsmSharp 354 2 +kasper93/mpc-hc 336 5 +ChaiScript/ChaiScript 332 1 +odoo/odoo 329 59 +pocket-playlab/optimus-prime 328 2 +OpenSprites/OpenSprites 325 14 +laravel/framework 325 136 +akkadotnet/akka.net 305 6 +edx/edx-platform 304 40 +netguru/carrierwave-ios 303 7 +nick-levelup/home 294 4 +cbitstech/think_feel_do_engine 292 7 +online-labs/kernel-config 292 3 +rails/rails 292 104 +Warsow/qfusion 284 5 +kenji0919/pirodx_manager 283 2 +php/php-src 282 44 +gbitten/preempt_rt 278 1 +HaxeFoundation/haxe 255 19 +basho/riak_cs 255 6 +PrestaShop/PrestaShop 250 60 +hwoarangmy/OpenDungeons 249 5 +nl5887/github-webhook-test 240 2 +nl5887/linthub-demo 237 1 +L2J/L2J_Server 234 17 +netguru/devise-ios 231 7 +lmccallum/GeoCanViz 229 2 +mtk09422/chromiumos-third_party-coreboot 228 2 +cachethq/Cachet 222 9 +gorhill/uBlock 218 20 + diff --git a/regression-test/data/variant_p2/sql/issuesWithTheMostComments8.out b/regression-test/data/variant_p2/sql/issuesWithTheMostComments8.out new file mode 100644 index 0000000000..fb3a881a0c --- /dev/null +++ b/regression-test/data/variant_p2/sql/issuesWithTheMostComments8.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !issuesWithTheMostComments8 -- + diff --git a/regression-test/data/variant_p2/sql/mostForkedRepositories.out b/regression-test/data/variant_p2/sql/mostForkedRepositories.out new file mode 100644 index 0000000000..be96dadb40 --- /dev/null +++ b/regression-test/data/variant_p2/sql/mostForkedRepositories.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mostForkedRepositories -- +jtleek/datasharing 25220 +rdpeng/ProgrammingAssignment2 14984 +octocat/Spoon-Knife 12375 +MyCoolTest/a-terrible-project 9266 +rdpeng/ExData_Plotting1 6362 +TridentSDK/Trident 5571 +rdpeng/RepData_PeerAssessment1 3557 +udacity/frontend-nanodegree-resume 3256 +twbs/bootstrap 3104 +barryclark/jekyll-now 2792 +angular/angular.js 2630 +LarryMad/recipes 2072 +HubPress/hubpress.io 1801 +rdpeng/courses 1723 +vhf/free-programming-books 1543 +jlord/patchwork 1467 +github/gitignore 1330 +mbostock/d3 1299 +bcaffo/courses 1252 +deadlyvipers/dojo_rules 1222 +torvalds/linux 1221 +Homebrew/homebrew 1207 +aporter/coursera-android 1089 +docker/docker 1064 +goagent/goagent 1032 +Trinea/android-open-project 1007 +rails/rails 960 +laravel/laravel 954 +facebook/react 949 +tastejs/todomvc 909 +nightscout/cgm-remote-monitor 907 +udacity/create-your-own-adventure 894 +apache/spark 889 +gabrielecirulli/2048 844 +yiisoft/yii2 838 +facebook/react-native 822 +Itseez/opencv 813 +prakhar1989/awesome-courses 813 +FortAwesome/Font-Awesome 774 +dotnet/coreclr 760 +ColonyTestOrganisation/repoName 753 +swirldev/swirl_courses 750 +DataScienceSpecialization/courses 746 +USArmyResearchLab/Dshell 736 +django/django 714 +h5bp/Front-end-Developer-Interview-Questions 714 +iluwatar/java-design-patterns 714 +jquery/jquery 712 +daneden/animate.css 707 +atom/atom 705 + diff --git a/regression-test/data/variant_p2/sql/mostPopularCommentsOnGithub.out b/regression-test/data/variant_p2/sql/mostPopularCommentsOnGithub.out new file mode 100644 index 0000000000..78eb92582c --- /dev/null +++ b/regression-test/data/variant_p2/sql/mostPopularCommentsOnGithub.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mostPopularCommentsOnGithub -- +:+1: 38565 + diff --git a/regression-test/data/variant_p2/sql/organizationsByTheNumberOfRepositories.out b/regression-test/data/variant_p2/sql/organizationsByTheNumberOfRepositories.out new file mode 100644 index 0000000000..41e4d206ec --- /dev/null +++ b/regression-test/data/variant_p2/sql/organizationsByTheNumberOfRepositories.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !organizationsByTheNumberOfRepositories -- +google 168 +apache 116 +facebook 100 +sindresorhus 94 +substack 89 +codrops 66 +twitter 59 +mozilla 58 +spring-projects 55 +stackforge 54 +netflix 50 +openstack 48 +tj 47 +yahoo 47 +square 46 +thoughtbot 44 +mapbox 43 +googlesamples 41 +angular 36 +github 36 +maxogden 35 +microsoft 34 +tpope 34 +googlecloudplatform 33 +mafintosh 33 +nicklockwood 33 +alibaba 31 +heroku 30 +aspnet 29 +adafruit 28 +automattic 28 +elasticsearch 28 +firebase 28 +thephpleague 28 +hadley 27 +spotify 27 +addyosmani 26 +atom 26 +driftyco 26 +mattn 26 +esri 25 +gruntjs 25 +iojs 25 +shopify 25 +filamentgroup 24 +golang 24 +rails 24 +yeoman 24 +angular-ui 23 +awslabs 23 + diff --git a/regression-test/data/variant_p2/sql/organizationsByTheNumberOfStars.out b/regression-test/data/variant_p2/sql/organizationsByTheNumberOfStars.out new file mode 100644 index 0000000000..cf8094bb77 --- /dev/null +++ b/regression-test/data/variant_p2/sql/organizationsByTheNumberOfStars.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !organizationsByTheNumberOfStars -- +facebook 46498 +google 33024 +angular 10705 +github 9153 +square 8896 +dotnet 8735 +thoughtbot 8290 +h5bp 8175 +yalantis 8129 +phanan 7996 +sindresorhus 7660 +alex 7615 +microsoft 7376 +mozilla 7136 +moklick 6935 +prakhar1989 6772 +apache 6762 +docker 6498 +yaronn 6467 +flipboard 6457 +twbs 6024 +iojs 5924 +yahoo 5793 +gorhill 5791 +twitter 5740 +vhf 5526 +arasatasaygin 5498 +substack 5464 +tiimgreen 5425 +airbnb 5182 +atom 5169 +mengto 5150 +sdelements 5096 +bendc 5082 +jakewharton 5020 +driftyco 5005 +wasabeef 4750 +0xax 4661 +getify 4554 +googlesamples 4504 +ianlunn 4348 +grpc 4339 +angular-ui 4188 +shadowsocks 4177 +daniel-lundin 4001 +leaverou 3999 +etsy 3968 +ecomfe 3911 +mango 3888 +mbostock 3839 + diff --git a/regression-test/data/variant_p2/sql/organizationsByTheSizeOfCommunity.out b/regression-test/data/variant_p2/sql/organizationsByTheSizeOfCommunity.out new file mode 100644 index 0000000000..667b28838b --- /dev/null +++ b/regression-test/data/variant_p2/sql/organizationsByTheSizeOfCommunity.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !organizationsByTheSizeOfCommunity -- +facebook 3225 797 1516 2522 195 172 +fortawesome 2994 42 624 2645 3 3 +angular 2831 463 1229 2262 109 44 +docker 2697 504 948 2419 210 34 +atom 2620 276 1368 2024 65 20 + diff --git a/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks1.out b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks1.out new file mode 100644 index 0000000000..026d4c8e79 --- /dev/null +++ b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks1 -- +jtleek/datasharing 25220 555 0 +rdpeng/ProgrammingAssignment2 14984 91 0 +octocat/Spoon-Knife 12375 165 0 +MyCoolTest/a-terrible-project 9266 0 0 +rdpeng/ExData_Plotting1 6362 12 0 +TridentSDK/Trident 5571 19 0 +rdpeng/RepData_PeerAssessment1 3557 6 0 +udacity/frontend-nanodegree-resume 3256 49 0 +twbs/bootstrap 3104 4475 1 +barryclark/jekyll-now 2792 471 0 +angular/angular.js 2630 3942 1 +LarryMad/recipes 2072 13 0 +HubPress/hubpress.io 1801 1843 1 +rdpeng/courses 1723 281 0 +vhf/free-programming-books 1543 5437 4 +jlord/patchwork 1467 36 0 +github/gitignore 1330 2926 2 +mbostock/d3 1299 3422 3 +bcaffo/courses 1252 167 0 +deadlyvipers/dojo_rules 1222 14 0 +torvalds/linux 1221 2907 2 +Homebrew/homebrew 1207 1583 1 +aporter/coursera-android 1089 602 1 +docker/docker 1064 2655 2 +goagent/goagent 1032 1912 2 +Trinea/android-open-project 1007 1901 2 +rails/rails 960 1363 1 +laravel/laravel 954 2056 2 +facebook/react 949 6762 7 +tastejs/todomvc 909 1035 1 +nightscout/cgm-remote-monitor 907 25 0 +udacity/create-your-own-adventure 894 18 0 +apache/spark 889 1028 1 +gabrielecirulli/2048 844 485 1 +yiisoft/yii2 838 1034 1 +facebook/react-native 822 10021 12 +Itseez/opencv 813 847 1 +prakhar1989/awesome-courses 813 6110 8 +FortAwesome/Font-Awesome 774 2798 4 +dotnet/coreclr 760 4049 5 +ColonyTestOrganisation/repoName 753 0 0 +swirldev/swirl_courses 750 252 0 +DataScienceSpecialization/courses 746 296 0 +USArmyResearchLab/Dshell 736 3401 5 +django/django 714 1369 2 +h5bp/Front-end-Developer-Interview-Questions 714 5177 7 +iluwatar/java-design-patterns 714 2771 4 +jquery/jquery 712 1368 2 +daneden/animate.css 707 2687 4 +atom/atom 705 2745 4 + diff --git a/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks2.out b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks2.out new file mode 100644 index 0000000000..8997ec079f --- /dev/null +++ b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks2 -- +motemen/gore 11 894 81 +nathanpeck/clui 10 669 66 +venantius/ultra 10 597 59 +IonicaBizau/git-stats 56 2896 51 +skywinder/github-changelog-generator 19 958 50 +FormidableLabs/radium 16 788 49 +amscanne/huptime 10 493 49 +anacrolix/torrent 16 782 48 +vektra/templar 14 661 47 +philipwalton/flexbugs 29 1331 45 +KeyboardFire/mkcast 70 3134 44 +labstack/echo 13 572 44 +mentum/lambdaws 22 953 43 +begriffs/postgrest 55 2312 42 +jaicab/localFont 13 557 42 +mikechau/react-primer-draft 71 3044 42 +seppo0010/rlite 16 676 42 +shipitjs/shipit 47 1976 42 +yaronn/GifW00t 13 557 42 +bevacqua/fuzzysearch 15 622 41 +jgrahamc/httpdiff 34 1399 41 +sindresorhus/chalk 16 666 41 +Yomguithereal/baobab 20 782 39 +pdcgomes/XCActionBar 25 965 38 +ko1/pretty_backtrace 14 519 37 +vim/vim 74 2746 37 +BurntSushi/xsv 10 367 36 +gizak/termui 74 2716 36 +nicklockwood/MustOverride 12 434 36 +ry/v8worker 15 551 36 +systemjs/systemjs 35 1265 36 +codesuki/react-d3-components 12 420 35 +fizx/parsley 20 717 35 +israelidanny/ie8linter 21 741 35 +YabataDesign/afterglow-theme 26 896 34 +google/yapf 22 751 34 +trello/victor 11 379 34 +ianks/octodown 15 506 33 +joewalnes/websocketd 79 2615 33 +maxogden/menubar 14 462 33 +paulirish/automated-chrome-profiling 11 367 33 +resume/resume.github.com 50 1698 33 +shellfire-dev/shellfire 28 935 33 +venmo/synx 11 372 33 +krockode/x_x 12 393 32 +pheuter/essential-react 41 1318 32 +centaurean/density 20 622 31 +kriskowal/gtor 22 697 31 +tdenniston/bish 37 1167 31 +yaronn/blessed-contrib 184 5880 31 + diff --git a/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks3.out b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks3.out new file mode 100644 index 0000000000..15f702a471 --- /dev/null +++ b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks3 -- +zzzili/zzCommonProject 6 13 0 +zzzeek/sqlalchemy 27 94 0 +zzz40500/PhotoBrowser 5 8 0 +zzz40500/HeadsUp 25 61 0 +zzz40500/GsonFormat 7 17 0 +zzyss86/tuijscom 5 16 0 +zzyss86/LunarCalendar 7 23 0 +zzrough/gs-extensions-drop-down-terminal 5 9 0 +zzmp/juliusjs 20 126 0 +zzhouj/Android-DraggableGridViewPager 10 16 0 +zzet/ansible-rbenv-role 12 23 0 +zzdboy/GoCMS 11 16 0 +zzarcon/focusable 7 38 0 +zzarbi/synology 5 9 0 +zzap/Cikonss 10 21 0 +zytzagoo/smtp-validate-email 7 8 0 +zythum/youkuhtml5playerbookmark 9 36 0 +zythum/mama2 27 218 0 +zyro/hyde-x 13 26 0 +zynga/scroller 34 588 0 +zynga/jsbindings 6 9 0 +zym2014/MingQQ 128 315 0 +zxlie/WeixinApi 186 356 0 +zxlie/FeHelper 8 20 0 +zxing/zxing 546 939 0 +zxh0/jvm.go 98 1256 0 +zxdrive/imouto.host 45 112 0 +zx2c4/password-store 15 37 0 +zwz/plantuml-mode 5 6 0 +zwopple/PocketSocket 5 14 0 +zwaldowski/BlocksKit 47 301 0 +zwacky/angular-flippy 5 6 0 +zverok/clio 7 21 0 +zutrinken/attila 8 18 0 +zurb/twentytwenty 10 37 0 +zurb/responsive-tables 6 13 0 +zurb/pizza 6 14 0 +zurb/joyride 9 30 0 +zurb/ink 39 198 0 +zurb/foundation-rails 33 46 0 +zurb/foundation-libsass-template 10 11 0 +zurb/foundation-compass-template 14 17 0 +zurb/foundation-apps-template 8 16 0 +zurb/foundation-apps 61 249 0 +zurb/foundation-5-sublime-snippets 19 51 0 +zurb/foundation 328 977 0 +zurb/bower-foundation 31 37 0 +zupet/LuaTinker 5 8 0 +zuk/jquery.inview 10 13 0 +ztianjin/BigData 5 7 0 + diff --git a/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks4.out b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks4.out new file mode 100644 index 0000000000..78eef66a20 --- /dev/null +++ b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks4.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks4 -- +1493710 3951085 2.65 + diff --git a/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks5.out b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks5.out new file mode 100644 index 0000000000..55770e0561 --- /dev/null +++ b/regression-test/data/variant_p2/sql/proportionsBetweenStarsAndForks5.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !proportionsBetweenStarsAndForks5 -- +788721 2938913 3.73 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments1.out b/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments1.out new file mode 100644 index 0000000000..0102ad866f --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithClickhouse_related_comments1 -- +apache/spark 9527 +apache/incubator-brooklyn 426 +apache/incubator-reef 355 +brianchandotcom/liferay-portal 347 +owncloud/core 294 +apache/activemq-6 236 +apache/flink 126 +shuyangzhou/liferay-portal 98 +apache/storm 92 +NFLabs/zeppelin 89 +chrsmith/google-api-java-client 85 +mtambara/liferay-portal 83 +sergiogonzalez/liferay-portal 81 +jirikuncar/invenio 76 +puppetlabs/puppetlabs-apache 72 +pentestgeek/phishing-frenzy 71 +puphpet/puphpet 68 +juliocamarero/liferay-portal 64 +phusion/passenger 61 +Homebrew/homebrew 52 +apache/tajo 48 +saltstack/salt 46 +rotty3000/liferay-portal 44 +apache/cloudstack 41 +crowell/modpagespeed 41 +letsencrypt/lets-encrypt-preview 40 +ealonso/liferay-portal 38 +elasticsearch/elasticsearch 37 +meteor/meteor 36 +ManageIQ/manageiq 35 +apache/cordova-lib 34 +spark-jobserver/spark-jobserver 34 +commons-rdf/commons-rdf 32 +docker-library/php 32 +fail2ban/fail2ban 32 +shawnmckinney/apache-fortress-demo 32 +matethurzo/liferay-portal 31 +spring-projects/spring-boot 31 +intel-hadoop/gearpump 30 +owncloud/client 30 +apache/couchdb-fauxton 29 +apache/incubator-parquet-mr 29 +snipe/snipe-it 29 +GoogleCloudPlatform/kubernetes 28 +ampache/ampache 28 +joomla/joomla-cms 28 +apache/camel 27 +IQSS/dataverse 26 +openmicroscopy/openmicroscopy 26 +Shopify/sarama 25 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments2.out b/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments2.out new file mode 100644 index 0000000000..21d9cf83d4 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithClickhouse_related_comments2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithClickhouse_related_comments2 -- +facebook/react-native 10021 1 +phanan/htaccess 7964 9 +sdelements/lets-chat 5045 7 +twbs/bootstrap 4475 2 +twostairs/paperwork 3684 20 +libreboard/libreboard 3469 1 +Selz/plyr 3403 2 +wasabeef/awesome-android-ui 3225 1 +sophron/wifiphisher 3142 2 +jhauswald/sirius 3030 1 +github/gitignore 2926 1 +FortAwesome/Font-Awesome 2798 1 +driftyco/ionic 2783 6 +iluwatar/java-design-patterns 2771 2 +atom/atom 2745 3 +docker/docker 2655 17 +Semantic-Org/Semantic-UI 2488 2 +meteor/meteor 2486 36 +golang/go 2252 4 +robbyrussell/oh-my-zsh 2227 2 +google/gxui 2172 1 +shadowsocks/shadowsocks 2134 3 +facebook/stetho 2085 2 +gogits/gogs 1959 6 +joyent/node 1940 8 +reapp/reapp 1925 2 +hakimel/reveal.js 1900 1 +rust-lang/rust 1869 13 +tripit/slate 1868 1 +Automattic/socket.io 1840 2 +angular/material 1778 3 +h5bp/html5-boilerplate 1710 1 +babel/babel 1693 1 +android-cn/android-open-project-analysis 1676 1 +dotnet/roslyn 1626 19 +lodash/lodash 1585 1 +Homebrew/homebrew 1583 52 +AFNetworking/AFNetworking 1572 2 +jekyll/jekyll 1479 7 +adobe/brackets 1463 2 +ansible/ansible 1441 7 +Polymer/polymer 1384 1 +haifengl/smile 1371 1 +django/django 1369 1 +jquery/jquery 1368 6 +rails/rails 1363 8 +mitsuhiko/flask 1359 4 +balderdashy/sails 1343 4 +Unitech/PM2 1311 1 +GoogleCloudPlatform/kubernetes 1303 28 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments1.out b/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments1.out new file mode 100644 index 0000000000..206617322f --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithDoris_related_comments1 -- +apache/spark 15220 +amplab-extras/SparkR-pkg 289 +NFLabs/zeppelin 184 +datastax/spark-cassandra-connector 148 +spark-jobserver/spark-jobserver 146 +NREL/OpenStudio 88 +awslabs/emr-bootstrap-actions 52 +mesos/spark-ec2 50 +spring-projects/spring-xd 46 +spark/spark-cli 45 +mikesparks/OLB-Dev-Tracker 44 +perwendel/spark 44 +spark/firmware 43 +andypetrella/spark-notebook 41 +elasticsearch/elasticsearch-hadoop 41 +caskdata/cdap 40 +SparkDevNetwork/Rock 38 +vslavik/winsparkle 34 +cleanflight/cleanflight 33 +ibm-et/spark-kernel 33 +sparkle-project/Sparkle 33 +apache/mahout 30 +databricks/spark-avro 29 +ReactiveCocoa/ReactiveCocoa 28 +hbons/SparkleShare 28 +bigdatagenomics/adam 26 +libgit2/libgit2sharp 26 +Carthage/Carthage 23 +TauLabs/TauLabs 23 +databricks/spark-csv 23 +berkeley-dsc/dlab-finance 22 +sparklemotion/nokogiri 22 +yola/healthcheck 22 +sampsyo/beets 21 +ilayaperumalg/spring-xd 19 +scalanlp/breeze 19 +spark/spark-dev 19 +cluhring/the_pivot 18 +elastic/elasticsearch-hadoop 18 +mikedao/the_pivot 18 +OryxProject/oryx 17 +SparkartGroupInc/jungle-solidus 17 +tresata/spark-scalding 17 +ComputationalRadiationPhysics/picongpu 16 +KristaANelson/the_pivot 16 +atom/atom 16 +gorillalabs/sparkling 16 +robot-monkey/final-project 16 +ContinuumIO/into 14 +geotrellis/geotrellis 14 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments2.out b/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments2.out new file mode 100644 index 0000000000..7e8ac449b6 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithDoris_related_comments2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithDoris_related_comments2 -- +facebook/react-native 10021 1 +alex/what-happens-when 7407 2 +prakhar1989/awesome-courses 6110 2 +yaronn/blessed-contrib 5880 2 +tiimgreen/github-cheat-sheet 5420 1 +gorhill/uBlock 5415 2 +iojs/io.js 4866 1 +dotnet/coreclr 4049 1 +angular/angular.js 3942 1 +muut/riotjs 3244 10 +github/gitignore 2926 4 +primer/primer 2816 1 +atom/atom 2745 16 +gizak/termui 2716 2 +docker/docker 2655 5 +lukasz-madon/awesome-remote-job 2579 1 +meteor/meteor 2486 1 +robbyrussell/oh-my-zsh 2227 1 +gogits/gogs 1959 2 +rust-lang/rust 1869 4 +prometheus/prometheus 1767 1 +dotnet/roslyn 1626 4 +neovim/neovim 1621 2 +lodash/lodash 1585 2 +Homebrew/homebrew 1583 13 +amjith/pgcli 1495 1 +jekyll/jekyll 1479 5 +ansible/ansible 1441 1 +dotnet/corefx 1389 1 +django/django 1369 1 +papers-we-love/papers-we-love 1366 1 +rails/rails 1363 4 +mrdoob/three.js 1351 1 +GoogleCloudPlatform/kubernetes 1303 6 +atom/atom-shell 1285 3 +google/guava 1274 1 +rg3/youtube-dl 1272 1 +scrapy/scrapy 1241 1 +kennethreitz/requests 1236 6 +mozilla/pdf.js 1234 1 +Microsoft/TypeScript 1217 3 +facebook/pop 1127 1 +BVLC/caffe 1102 1 +ReactiveCocoa/ReactiveCocoa 1098 28 +petkaantonov/bluebird 1095 1 +creationix/nvm 1081 1 +spoike/refluxjs 1073 1 +Carthage/Carthage 1043 23 +tastejs/todomvc 1035 1 +yiisoft/yii2 1034 2 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.out b/regression-test/data/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.out new file mode 100644 index 0000000000..0ff4b6065b --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheHighestGrowthYoY -- +0--------------------------------------/Trivia-Crack-DNS 0 1 0 +0-1-0/lightblue-0.4 0 3 0 +0-14N/NDroid 0 2 0 +0-afflatus/grail_test 0 1 0 +0-duke/wdpassport-utils 0 1 0 +0-jake-0/jlab 0 1 0 +0/paper.js 0 1 0 +00-Evan/PD-classes 0 2 0 +00-Evan/shattered-pixel-dungeon 0 22 0 +00-Evan/shattered-pixel-dungeon-gdx 0 1 0 +00/wikihouse 0 1 0 +00/wikihouse-controls 0 2 0 +000fan000/d3-cloud 0 1 0 +00100100/ElohimSolver4 0 1 0 +001295039/SSBST 0 1 0 +002301/SequenceFrames 0 2 0 +00450681/AndroidANCSNotification 0 2 0 +007-surajit/Directory-List-PhoneGap-Plugin 0 1 0 +007/hashcash-js 0 1 0 +007bonds/SequoiaDB 0 1 0 +007lva/gulp-jade-sass-6to5 0 3 0 +007lva/mallampati-rest 0 1 0 +007rahulraman/flask_fb_app 0 1 0 +007shaoye/007 0 1 0 +0088FF/get.js 0 5 0 +0088FF/turkoz.me 0 5 0 +00Craft/GrandTheftCraft 0 1 0 +00Rez/c2 0 1 0 +00StevenG/NSDictionary-ImageMetadata 0 4 0 +00StevenG/NSString-Japanese 0 6 0 +00StevenG/UITextViewExtras 0 4 0 +00benallen/TrashSmash 0 1 0 +00buggy00/SwiftOpenGL 0 2 0 +00conan00/JLex 0 1 0 +00erik2/OperatingSystemsHomework 0 1 0 +00krishna/gdelt_download 0 1 0 +00nanhai/Lcc 0 1 0 +01000101/DataRaptor 0 1 0 +01000101/insteon-75790-recorder 0 1 0 +01000101/onedrive-linux-client 0 2 0 +0101/pipetools 0 2 0 +01010101/GML4U 0 1 0 +0105time/AlloyRenderingEngine 0 1 0 +0109yuma/camera-app-swift 0 1 0 +010blue/oldcms 0 2 0 +0110/WookieController 0 1 0 +0110/wifikeyboard 0 1 0 +01271/OpenEndgame 0 1 0 +013/Soundcloud-Downloader 0 12 0 +01AutoMonkey/open.gl-tutorials-to-pyglet 0 2 0 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.out new file mode 100644 index 0000000000..90778d85dc --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues1 -- +No-CQRT/GooGuns 24737 1 +chrsmith/google-api-java-client 10568 1 +Khan/khan-i18n 8940 1 +chrsmith/bwapi 6258 1 +chrsmith/nishazi6 4877 1 +LeaVerou/awesomplete 4608 748 +chrsmith/html5rocks 4395 2 +huntermcmillian/huntermcmillian 3175 1 +chrsmith/open-ig 2516 1 +chrsmith/reaver-wps 2300 1 +djbouche/glowing-bear 2088 1 +andresriancho/w3af 1995 21 +sbezborotest/test 1989 5 +spyder-ide/spyder 1848 49 +BOINC/boinc 1754 6 +pedromorgan/flightgear-issues-test 1750 1 +Atlantiss/BugTracker 1665 501 +pychess/pychess 1658 4 +rust-lang/rust 1648 589 +sphinx-doc/testing2 1598 1 +chrsmith/hedgewars 1447 1 +wbish/IronPath 1309 1 +TWtablero/repoTest1 1306 5 +owncloud/core 1299 577 +kewinrausch/Test 1293 1 +pzia/vlm 1244 1 +LeaVerou/prefixfree 1226 106 +BOINC/boinc-ng 1223 1 +joyent/node 1186 289 +BALL-Project/ball 1162 4 +ConEmu/old-issues 1144 1 +chrsmith/blogger-ftp-migration-tracker 1133 1 +v-l-m/vlm 1088 5 +atom/atom 1074 779 +GoogleCloudPlatform/kubernetes 1048 197 +Virtual-TreeView/Virtual-TreeView 1037 10 +phalcon/cphalcon 1014 170 +fourq/hackball 1000 1 +chrsmith/dsdsdaadf 991 1 +chrsmith/jsjsj122 991 1 +dotnet/roslyn 962 209 +harishamdani/TestRepo 960 1 +saltstack/salt 958 448 +chrsmith/mo-1 948 1 +chrsmithdemos/google-api-java-client 923 1 +chrsmithdemos/open-ig 917 1 +chrsmithdemos/html5rocks 911 1 +chrsmith/scribefire-chrome 886 1 +CartoDB/cartodb 884 55 +docker/docker 873 524 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.out new file mode 100644 index 0000000000..4f8281d36d --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues2 -- +No-CQRT/GooGuns 24737 1 0 +chrsmith/google-api-java-client 10568 1 0 +Khan/khan-i18n 8940 1 2 +chrsmith/bwapi 6258 1 0 +chrsmith/nishazi6 4877 1 0 +LeaVerou/awesomplete 4608 748 3390 +chrsmith/html5rocks 4395 2 0 +huntermcmillian/huntermcmillian 3175 1 1 +chrsmith/open-ig 2516 1 0 +chrsmith/reaver-wps 2300 1 0 +djbouche/glowing-bear 2088 1 0 +andresriancho/w3af 1995 21 68 +sbezborotest/test 1989 5 0 +spyder-ide/spyder 1848 49 175 +BOINC/boinc 1754 6 44 +pedromorgan/flightgear-issues-test 1750 1 0 +Atlantiss/BugTracker 1665 501 52 +pychess/pychess 1658 4 24 +rust-lang/rust 1648 589 1869 +sphinx-doc/testing2 1598 1 0 +chrsmith/hedgewars 1447 1 0 +wbish/IronPath 1309 1 0 +TWtablero/repoTest1 1306 5 0 +owncloud/core 1299 577 360 +kewinrausch/Test 1293 1 0 +pzia/vlm 1244 1 0 +LeaVerou/prefixfree 1226 106 144 +BOINC/boinc-ng 1223 1 0 +joyent/node 1186 289 1940 +BALL-Project/ball 1162 4 1 +ConEmu/old-issues 1144 1 0 +chrsmith/blogger-ftp-migration-tracker 1133 1 0 +v-l-m/vlm 1088 5 7 +atom/atom 1074 779 2745 +GoogleCloudPlatform/kubernetes 1048 197 1303 +Virtual-TreeView/Virtual-TreeView 1037 10 27 +phalcon/cphalcon 1014 170 592 +fourq/hackball 1000 1 0 +chrsmith/dsdsdaadf 991 1 0 +chrsmith/jsjsj122 991 1 0 +dotnet/roslyn 962 209 1626 +harishamdani/TestRepo 960 1 0 +saltstack/salt 958 448 469 +chrsmith/mo-1 948 1 0 +chrsmithdemos/google-api-java-client 923 1 0 +chrsmithdemos/open-ig 917 1 0 +chrsmithdemos/html5rocks 911 1 0 +chrsmith/scribefire-chrome 886 1 0 +CartoDB/cartodb 884 55 102 +docker/docker 873 524 2655 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.out new file mode 100644 index 0000000000..8ee41ec721 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues3 -- +github/gitignore 0 0 2926 +torvalds/linux 0 0 2907 +thoughtbot/til 0 0 2769 +laravel/laravel 0 0 2056 +goagent/goagent 0 0 1912 +resume/resume.github.com 0 0 1698 +django/django 0 0 1369 +MaciejCzyzewski/retter 0 0 1245 +discourse/discourse 0 0 1172 +bltavares/Knoodou 0 0 1157 +blueimp/jQuery-File-Upload 0 0 1091 +hakimel/css 0 0 1073 +angrave/SystemProgramming 0 0 1028 +apache/spark 0 0 1028 +jez/vim-as-an-ide 0 0 987 +simple-android-framework-exchange/android_design_patterns_analysis 0 0 888 +git/git 0 0 878 +Itseez/opencv 0 0 847 +wsargent/docker-cheat-sheet 0 0 835 +lgvalle/Material-Animations 0 0 810 +cdarwin/go-koans 0 0 767 +fcambus/nginx-resources 0 0 719 +ruby/ruby 0 0 690 +nathanpeck/clui 0 0 669 +owainlewis/awesome-artificial-intelligence 0 0 662 +vektra/templar 0 0 661 +qrpike/Web-Font-Load 0 0 656 +spring-projects/spring-framework 0 0 650 +mongodb/mongo 0 0 632 +Naituw/WBWebViewConsole 0 0 612 +php/php-src 0 0 607 +anicollection/anicollection 0 0 605 +geekcompany/ResumeSample 0 0 573 +yaronn/GifW00t 0 0 557 +WordPress/WordPress 0 0 534 +chrisbanes/Android-PullToRefresh 0 0 534 +spasmilo/electrum 0 0 533 +clojure/clojurescript 0 0 520 +oli107/material-range-bar 0 0 456 +bup/bup 0 0 453 +LukeLin/data-structure-with-js 0 0 439 +sparanoid/chinese-copywriting-guidelines 0 0 439 +chriskempson/base16 0 0 432 +keefo/CATweaker 0 0 430 +vandadnp/swift-weekly 0 0 410 +mafintosh/airpaste 0 0 409 +johnno1962/Remote 0 0 403 +aosp-exchange-group/share 0 0 402 +nemoTyrant/manong 0 0 398 +apache/storm 0 0 396 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.out new file mode 100644 index 0000000000..623c4971ec --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfIssues4 -- +github/gitignore 0 0 2926 +torvalds/linux 0 0 2907 +thoughtbot/til 0 0 2769 +laravel/laravel 0 0 2056 +goagent/goagent 0 0 1912 +resume/resume.github.com 0 0 1698 +django/django 0 0 1369 +MaciejCzyzewski/retter 0 0 1245 +discourse/discourse 0 0 1172 +bltavares/Knoodou 0 0 1157 +blueimp/jQuery-File-Upload 0 0 1091 +hakimel/css 0 0 1073 +angrave/SystemProgramming 0 0 1028 +apache/spark 0 0 1028 +jez/vim-as-an-ide 0 0 987 +simple-android-framework-exchange/android_design_patterns_analysis 0 0 888 +git/git 0 0 878 +Itseez/opencv 0 0 847 +wsargent/docker-cheat-sheet 0 0 835 +lgvalle/Material-Animations 0 0 810 +cdarwin/go-koans 0 0 767 +fcambus/nginx-resources 0 0 719 +ruby/ruby 0 0 690 +nathanpeck/clui 0 0 669 +owainlewis/awesome-artificial-intelligence 0 0 662 +vektra/templar 0 0 661 +qrpike/Web-Font-Load 0 0 656 +spring-projects/spring-framework 0 0 650 +mongodb/mongo 0 0 632 +Naituw/WBWebViewConsole 0 0 612 +php/php-src 0 0 607 +anicollection/anicollection 0 0 605 +geekcompany/ResumeSample 0 0 573 +yaronn/GifW00t 0 0 557 +WordPress/WordPress 0 0 534 +chrisbanes/Android-PullToRefresh 0 0 534 +spasmilo/electrum 0 0 533 +clojure/clojurescript 0 0 520 +oli107/material-range-bar 0 0 456 +bup/bup 0 0 453 +LukeLin/data-structure-with-js 0 0 439 +sparanoid/chinese-copywriting-guidelines 0 0 439 +chriskempson/base16 0 0 432 +keefo/CATweaker 0 0 430 +vandadnp/swift-weekly 0 0 410 +mafintosh/airpaste 0 0 409 +johnno1962/Remote 0 0 403 +aosp-exchange-group/share 0 0 402 +nemoTyrant/manong 0 0 398 +apache/storm 0 0 396 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.out new file mode 100644 index 0000000000..b6838888c8 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfPullRequests1 -- +0----0/language-c 1 1 +0-1-0/lightblue-0.4 1 1 +0-Byte/hello-world 1 1 +0-F/chocolatey-package 1 1 +0-IceShard-0/Jeffery-the-space-whale 1 1 +0003088/libelektra 1 1 +000axa-MUSH/shish 1 1 +003moonj/hello-world 1 1 +00Joe/MessageSender 1 1 +00Zeb/SimpleChess 1 1 +00kenshin/hexo 1 1 +00lito/MyExcercises 1 1 +01org/Galileo-Runtime 1 1 +01org/KVMGT-kernel 1 1 +01org/cordova-google-play-games-services 1 1 +01org/dleyna-connector-dbus 1 1 +01org/dleyna-renderer 1 1 +01org/dleyna-server 1 1 +01org/jndn-utils 1 1 +01org/luv-yocto 1 1 +01org/mic 1 1 +01org/parameter-framework-plugins-alsa 1 1 +01org/parameter-framework-samples 1 1 +01org/thermal_daemon 1 1 +02501jm/hello-world 1 1 +02strich/django-auth-kerberos 1 1 +0520978/hello-world 1 1 +0532/shuikong 1 1 +0532/spring 1 1 +05BIT008/rails 1 1 +080419android/droidCal 1 1 +0933322888/Kv-007.ecomap-backend 1 1 +0933322888/hello-world 1 1 +096-NET-UA/android-1 1 1 +096acc/oneironauts 1 1 +09terp/inwit.io 1 1 +0ADMods/eyecandy 1 1 +0BS0L33T/hello-world 1 1 +0Matt/Hello-World 1 1 +0X1A/yabs 1 1 +0aps/CafeteriaApp 1 1 +0asa/awesome-python 1 1 +0atman/0atman.com 1 1 +0atman/web-style-guide 1 1 +0dataloss/pyrax 1 1 +0ddfell0w/ACM-NYU 1 1 +0dp/generator-wp-bones 1 1 +0install/0install 1 1 +0install/repo.roscidus.com 1 1 +0intro/plan9 1 1 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.out new file mode 100644 index 0000000000..82e5a50926 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumAmountOfPullRequests2 -- +jlord/patchwork 1226 1064 +deadlyvipers/dojo_rules 1520 997 +octocat/Spoon-Knife 976 910 +Homebrew/homebrew 2194 691 +udacity/create-your-own-adventure 622 507 +caskroom/homebrew-cask 1440 442 +rails/rails 839 359 +baidu-ife/ife 381 347 +rust-lang/rust 1713 337 +apache/spark 1359 296 +laravel/framework 694 293 +docker/docker 1128 287 +LarryMad/recipes 293 268 +borisyankov/DefinitelyTyped 508 265 +saltstack/salt 1795 249 +NixOS/nixpkgs 1133 247 +JetBrains/swot 273 242 +wbond/package_control_channel 301 238 +rdpeng/ProgrammingAssignment2 243 224 +Azure/azure-content 784 217 +iloveponies/training-day 218 214 +cms-sw/cmssw 1485 213 +odoo/odoo 933 213 +django/django 578 196 +GoogleCloudPlatform/kubernetes 1793 194 +laravel/docs 282 191 +angular/angular.js 334 188 +mozilla-b2g/gaia 2113 182 +symfony/symfony 614 175 +yiisoft/yii2 427 174 +iloveponies/i-am-a-horse-in-the-land-of-booleans 171 171 +ansible/ansible 297 170 +facebook/react 377 164 +TheOdinProject/curriculum 530 161 +cdnjs/cdnjs 294 161 +milkypostman/melpa 262 161 +ansible/ansible-modules-core 281 160 +leereilly/swot 199 158 +ga-students/wdi-fundamentals-rps 154 147 +iojs/io.js 570 136 +rcos/rcos-projects 170 134 +jsdelivr/jsdelivr 1147 127 +robbyrussell/oh-my-zsh 170 124 +joomla/joomla-cms 587 119 +ceph/ceph 909 115 +JuliaLang/METADATA.jl 422 115 +cocos2d/cocos2d-x 972 114 +michaelliao/learngit 123 114 +cakephp/docs 348 112 +ember-cli/ember-cli 376 111 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out new file mode 100644 index 0000000000..37e8e052de --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMaximumNumberOfAcceptedInvitations -- +cs428TAs/cs428 76 5 +5harad/crowds 69 8 +chrisvfritz/tc359_cheatsheets 56 2 +madmilla/THO78-Roborescue 47 2 +egingric/2015-Racing-Game 45 2 +anrom7/NLP_with_Python 38 2 +rsanchez-wsu/sp15-ceg3120 29 9 +projbdic32/bdi-c32 24 9 +nadiagarcia/EDA-ITESO-2015 24 5 +csteacherd22/preAPCS-4th-1415 23 7 +profwpollock/COP-2805-2015 23 4 +limingth/meteor-lean-startup-camp 22 18 +raquel-oliveira/historinha 22 2 +chusiang/irc_log 21 4 +rodrigogrow/GameHistoryTimeline 20 12 +JohnJvanLoon/AVRTCPIP 20 9 +amchristi/cs362s15 20 4 +gamondue/GOR5F 20 2 +wgdomenzain/UAG.IDS.8010.2015.1 20 2 +cretchas/projeto_es_2015_1 19 3 +hoenirvili/EDeC 19 3 +snags88/euler-club 19 3 +mskubenich/swan_rc3 18 8 +bennieandthejets/SoftwareEngineering 18 6 +highya/ztb-leetcode 18 5 +EScommander/Norco-GAM-79-Spr2015-CartGame 18 4 +asbezrukov/tvp-lab8 17 6 +RShankar/Senior-Seminar-on-Social-Web 17 5 +wgdomenzain/UAG.IDS.5010.2015.1 17 5 +JJjie/HIT-CSDN-WEB 17 3 +brainix/basilisk 16 5 +FcoMondro/ProgramacionWeb6D 16 2 +annamel/acroparallels2015 16 2 +Kentverger/wrk 15 16 +zachmay/inventory616 15 4 +JamesMuir/Creme_eggs 15 3 +MahikanthNag/JAVAians 15 3 +hieuletrung/iot-alljoyn 15 3 +rellermeyer/course_os 15 3 +ghaida/gdisjc-website 15 2 +polynquintela/CMSC128Project7L 15 2 +FreeCodeCamp/freecodecamp 14 123 +iojs/iojs-es 14 43 +rubis-lab/Vehicle-Dynamics-Simulator 14 10 +jedepaepe/PeoplePicker 14 7 +mohammadKarimi/Bellatrix.OneSpace 14 4 +NielsRavn/AcmeInc 14 3 +gokcer/atilim-comodo-java-egitim 14 2 +jafuentest/totem 14 2 +joemahmah/2022-frc-2015 14 2 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.out new file mode 100644 index 0000000000..aebd7fe84f --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostPeopleWhoHavePushAccess1 -- +zzzsochi/trans 0 1 +zzzombat/lucid-python-swftools 0 1 +zzzili/json2object 0 1 +zzzeek/sqlalchemy_akiban 0 1 +zzzeek/ibm-db-sa 0 1 +zzz654321/plyr 0 1 +zzz6519003/igame 0 1 +zzz40500/wanikani-for-android 0 1 +zzz40500/textdrawable 0 1 +zzz40500/soundrecorder 0 1 +zzz40500/progressbar 0 1 +zzz40500/pinned-section-listview 0 1 +zzz40500/inboxlayout 0 1 +zzyss86/zepto-learn 0 1 +zzyss86/smarthosts 0 1 +zzyss86/less-css3 0 1 +zzxuanran/web-appplication-architecture 0 1 +zzx2856/eno 0 1 +zzuminy/sky 0 1 +zzuli4519/materialdesignlibrary 0 1 +zzuli4519/ldrawer 0 1 +zzuli4519/dragtoplayout 0 1 +zzuhan/jquery-zclip 0 1 +zztalker/flskzakupki.gov.ru 0 1 +zzsme/thinksns_v2.8 0 1 +zzqxztc/the-one-pitt 0 1 +zzqxztc/socialnetwork-one- 0 1 +zzqiltw/trip-to-ios 0 1 +zzpoik/themelessfortypecho 0 1 +zzongaly/fasttap 0 1 +zzolo/plex_additions 0 1 +zzo/testablejs 0 1 +zznate/usergrid-rest-apigee-sample 0 1 +zzmp/redditext 0 1 +zzmp/freaq 0 1 +zzmom/android-babydays 0 1 +zzmfish/tvbrowser 0 1 +zzmfish/miboxlauncher 0 1 +zzm317/multicraft-1 0 1 +zzm317/capture-the-flag 0 1 +zzlui/zzlbox 0 1 +zzltjnh/kwpopoverviewdemo 0 1 +zzltjnh/jigsaw 0 1 +zzl5221281994/freenos 0 1 +zzl0/dparkintro 0 1 +zzkt/remembrance-agent 0 1 +zzkt/osc 0 1 +zzjhons/zravianx 0 1 +zzjhons/izariam 0 1 +zzjhons/civclicker 0 1 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.out new file mode 100644 index 0000000000..55b1335c1e --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostPeopleWhoHavePushAccess2 -- +zzzzzzzzzzzzzzzzzzzzzzzzzzz/project 0 0 +zzzzsong/testup 0 2 +zzzzrrr/porc 0 2 +zzzzBov/oAuthTwitterWrapper 0 0 +zzzzBov/npm-expansions 0 0 +zzzsochi/trans 0 1 +zzzsochi/ssh-authorizer 0 2 +zzzombat/lucid-python-swftools 0 1 +zzzmanzzz/examples 0 0 +zzzili/zzCommonProject 0 13 +zzzili/Json2Object 0 1 +zzzeek/sqlalchemy_akiban 0 1 +zzzeek/ibm-db-sa 0 1 +zzzeek/PyMySQL 0 0 +zzzTNTzzz/SingleCurrencyStorage 0 0 +zzz654321/plyr 0 1 +zzz6519003/socket 0 0 +zzz6519003/iGame 0 1 +zzz6519003/Yidian 0 0 +zzz6519003/Kailin-new-year 0 0 +zzz40500/pinned-section-listview 0 1 +zzz40500/WaniKani-for-Android 0 1 +zzz40500/TextDrawable 0 1 +zzz40500/SoundRecorder 0 1 +zzz40500/ProgressBar 0 1 +zzz40500/InboxLayout 0 1 +zzyss86/zepto-learn 0 1 +zzyss86/tuijs-blog 0 2 +zzyss86/Smarthosts 0 1 +zzyss86/LunarCalendar 0 23 +zzyss86/LESS-CSS3 0 1 +zzyjan/MercuryProject1 0 0 +zzyfisherman/moose 0 0 +zzy852/xunai.io 0 0 +zzxuanran/Web-Appplication-Architecture 0 1 +zzxjoanw/jquery-ui 0 0 +zzxi/c 0 0 +zzx2856/eno 0 1 +zzwqq/06 0 0 +zzw3239/fuck-2014-flirt-2015 0 0 +zzuwzj/QRCodeDemo 0 0 +zzuutt/thelia 0 0 +zzuutt/Paypal 0 0 +zzuutt/Atos 0 0 +zzuminy/sky 0 1 +zzuli4519/MaterialDesignLibrary 0 1 +zzuli4519/LDrawer 0 1 +zzuli4519/DragTopLayout 0 1 +zzuhan/jquery-zclip 0 1 +zzttyy413841467/homework2 0 0 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.out new file mode 100644 index 0000000000..9dd30f4455 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostPeopleWhoHavePushAccess3 -- +edx/edx-platform 62 205 +NixOS/nixpkgs 53 133 +aosp-exchange-group/fuck-2014-flirt-2015 45 158 +GoogleCloudPlatform/kubernetes 37 1303 +dotnet/roslyn 36 1626 +guardian/frontend 36 176 +mongodb/mongo 35 632 +JuliaLang/julia 32 517 +dotnet/corefx 30 1389 +neo4j/neo4j 29 227 +xbmc/xbmc 28 283 +elasticsearch/elasticsearch 27 899 +odoo/odoo 26 662 +facebook/rocksdb 25 354 +puppetlabs/puppet 25 282 +elastic/elasticsearch 24 369 +rails/rails 24 1363 +dotnet/coreclr 23 4049 +mono/mono 22 374 +angular/angular 21 1636 +google/trace-viewer 21 171 +JetBrains/kotlin 20 320 +aosp-exchange-group/share 20 402 +ceph/ceph 20 161 +rapid7/metasploit-framework 20 450 +telerik/kendo-ui-core 20 210 +docker/docker 19 2655 +reddit/reddit 19 608 +thoughtbot/guides 19 515 +CartoDB/cartodb 18 102 +mono/monodevelop 18 103 +owncloud/core 18 360 +chef/chef 17 182 +Microsoft/TypeScript 16 1217 +django/django 16 1369 +thoughtbot/hound 16 148 +thoughtbot/til 16 2769 +Azure/azure-content 15 112 +facebook/react 15 6762 +grpc/grpc 15 2313 +h2oai/h2o 15 199 +adobe/brackets 14 1463 +cockroachdb/cockroach 13 679 +emberjs/ember.js 13 1132 +jenkinsci/jenkins 13 396 +dolphin-emu/dolphin 12 344 +ember-cli/ember-cli 12 470 +openframeworks/openFrameworks 12 211 +openvswitch/ovs 12 128 +raspberrypi/documentation 12 113 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.out new file mode 100644 index 0000000000..ca26ff418a --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostStarsOverOneDay1 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.out new file mode 100644 index 0000000000..a4be8b85f1 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostStarsOverOneDay2 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.out new file mode 100644 index 0000000000..c97893738c --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostStarsOverOneDay3 -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.out b/regression-test/data/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.out new file mode 100644 index 0000000000..c5f81b28c0 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheMostSteadyGrowthOverTime -- +0--------------------------------------/Trivia-Crack-DNS 1 1 1 +0-1-0/lightblue-0.4 3 3 1 +0-14N/NDroid 2 2 1 +0-afflatus/grail_test 1 1 1 +0-duke/wdpassport-utils 1 1 1 +0-jake-0/jlab 1 1 1 +0/paper.js 1 1 1 +00-Evan/PD-classes 2 2 1 +00-Evan/shattered-pixel-dungeon 22 22 1 +00-Evan/shattered-pixel-dungeon-gdx 1 1 1 +00/wikihouse 1 1 1 +00/wikihouse-controls 2 2 1 +000fan000/d3-cloud 1 1 1 +00100100/ElohimSolver4 1 1 1 +001295039/SSBST 1 1 1 +002301/SequenceFrames 2 2 1 +00450681/AndroidANCSNotification 2 2 1 +007-surajit/Directory-List-PhoneGap-Plugin 1 1 1 +007/hashcash-js 1 1 1 +007bonds/SequoiaDB 1 1 1 +007lva/gulp-jade-sass-6to5 3 3 1 +007lva/mallampati-rest 1 1 1 +007rahulraman/flask_fb_app 1 1 1 +007shaoye/007 1 1 1 +0088FF/get.js 5 5 1 +0088FF/turkoz.me 5 5 1 +00Craft/GrandTheftCraft 1 1 1 +00Rez/c2 1 1 1 +00StevenG/NSDictionary-ImageMetadata 4 4 1 +00StevenG/NSString-Japanese 6 6 1 +00StevenG/UITextViewExtras 4 4 1 +00benallen/TrashSmash 1 1 1 +00buggy00/SwiftOpenGL 2 2 1 +00conan00/JLex 1 1 1 +00erik2/OperatingSystemsHomework 1 1 1 +00krishna/gdelt_download 1 1 1 +00nanhai/Lcc 1 1 1 +01000101/DataRaptor 1 1 1 +01000101/insteon-75790-recorder 1 1 1 +01000101/onedrive-linux-client 2 2 1 +0101/pipetools 2 2 1 +01010101/GML4U 1 1 1 +0105time/AlloyRenderingEngine 1 1 1 +0109yuma/camera-app-swift 1 1 1 +010blue/oldcms 2 2 1 +0110/WookieController 1 1 1 +0110/wifikeyboard 1 1 1 +01271/OpenEndgame 1 1 1 +013/Soundcloud-Downloader 12 12 1 +01AutoMonkey/open.gl-tutorials-to-pyglet 2 2 1 + diff --git a/regression-test/data/variant_p2/sql/repositoriesWithTheWorstStagnation_order.out b/regression-test/data/variant_p2/sql/repositoriesWithTheWorstStagnation_order.out new file mode 100644 index 0000000000..9dce31a0ab --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoriesWithTheWorstStagnation_order.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoriesWithTheWorstStagnation_order -- +0-1-0/lightblue-0.4 0 3 0 +0-14N/NDroid 0 2 0 +00-Evan/PD-classes 0 2 0 +00-Evan/shattered-pixel-dungeon 0 22 0 +00/wikihouse-controls 0 2 0 +002301/SequenceFrames 0 2 0 +00450681/AndroidANCSNotification 0 2 0 +007lva/gulp-jade-sass-6to5 0 3 0 +0088FF/get.js 0 5 0 +0088FF/turkoz.me 0 5 0 +00StevenG/NSDictionary-ImageMetadata 0 4 0 +00StevenG/NSString-Japanese 0 6 0 +00StevenG/UITextViewExtras 0 4 0 +00buggy00/SwiftOpenGL 0 2 0 +01000101/onedrive-linux-client 0 2 0 +0101/pipetools 0 2 0 +010blue/oldcms 0 2 0 +013/Soundcloud-Downloader 0 12 0 +01AutoMonkey/open.gl-tutorials-to-pyglet 0 2 0 +01org/DualScreen.Net 0 2 0 +01org/KVMGT-kernel 0 7 0 +01org/KVMGT-qemu 0 3 0 +01org/KVMGT-seabios 0 2 0 +01org/XenGT-Preview-kernel 0 9 0 +01org/XenGT-Preview-qemu 0 4 0 +01org/XenGT-Preview-xen 0 4 0 +01org/appframework 0 99 0 +01org/clloader 0 2 0 +01org/cordova-google-play-games-services 0 2 0 +01org/dleyna-control 0 3 0 +01org/dleyna-core 0 2 0 +01org/dleyna-server 0 2 0 +01org/dpdk-ovs 0 21 0 +01org/fiovisualizer 0 4 0 +01org/hpc-speedometer 0 3 0 +01org/idlf 0 2 0 +01org/ioprof 0 8 0 +01org/jWebAudio 0 6 0 +01org/liblwm2m 0 3 0 +01org/libxcam 0 5 0 +01org/libyami 0 5 0 +01org/luv-yocto 0 2 0 +01org/mic 0 2 0 +01org/msr-tools 0 2 0 +01org/numatop 0 3 0 +01org/obs-service-git-buildpackage 0 2 0 +01org/ozone-wayland 0 11 0 +01org/parameter-framework 0 2 0 +01org/parameter-framework-plugins-alsa 0 2 0 +01org/parameter-framework-plugins-filesystem 0 2 0 + diff --git a/regression-test/data/variant_p2/sql/repositoryAffinityList2.out b/regression-test/data/variant_p2/sql/repositoryAffinityList2.out new file mode 100644 index 0000000000..8087663833 --- /dev/null +++ b/regression-test/data/variant_p2/sql/repositoryAffinityList2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !repositoryAffinityList2 -- +0xPIT/encoder 10 100.4 +0xmalloc/c-log 10 100.4 +0xsauby/yasuo 10 100.4 +1000Memories/TMQuiltView 10 100.4 +1001Pharmacies/dictionary 10 100.4 +10gen-labs/edda 10 100.4 +18F/hub 10 100.4 +1c7/Youtube-Auto-Subtitle-Download 10 100.4 +200Creative/spree_bootstrap_frontend 10 100.4 +201-created/ember-cli-headroom 10 100.4 +222464/AILib 10 100.4 +360works/fmangular 10 100.4 +3logic/apollo-cassandra 10 100.4 +3rdpartyeve/phealng 10 100.4 +4k3R/material-color-picker 10 100.4 +5-say/laravel-4.1-quick-start-cn 10 100.4 +504ensicsLabs/DAMM 10 100.4 +5HT/n2o.hs 10 100.4 +626Pilot/RaspberryPi-NeoPixel-WS2812 10 100.4 +6pac/SlickGrid 10 100.4 +6to5/6to5.github.io 10 100.4 +AArnott/PCLCrypto 10 100.4 +ADmad/cakephp-jwt-auth 10 100.4 +AF83/fnf-detect 10 100.4 +ARM-software/arm-trusted-firmware 10 100.4 +ARMmbed/yotta 10 100.4 +AWCN/AndroidDevWeekly 10 100.4 +ActiveState/appdirs 10 100.4 +AdamBien/afterburner.fx 10 100.4 +AdamBrodzinski/meteor-mobile-boilerplate 10 100.4 +Aegisub/Aegisub 10 100.4 +AlanChatham/UnoJoy 10 100.4 +AlbertGrobas/PolygonImageView 10 100.4 +AlexanderGrom/Qevix 10 100.4 +Alkalinee/Hurricane 10 100.4 +AlloyTeam/AlloyStick 10 100.4 +AltBeacon/spec 10 100.4 +AltspaceVR/MacMoveToRift 10 100.4 +AlxMedia/anew 10 100.4 +AminRahimi/angular-bootstrap-persian-datepicker 10 100.4 +AmpersandJS/ampersandjs.com 10 100.4 +Anahkiasen/polyglot 10 100.4 +AndreasBriese/ipLocator 10 100.4 +AndrewDryga/vagrant-box-osx-mavericks 10 100.4 +AndrewSchenk/App-Scanner 10 100.4 +Anizoptera/AzaThread 10 100.4 +Anomen/vagrant-selenium 10 100.4 +Anonyfox/meteor-scrape 10 100.4 +AnthonyDiGirolamo/todotxt-machine 10 100.4 +Antrikshy/Quibbler 10 100.4 + diff --git a/regression-test/data/variant_p2/sql/sql01.out b/regression-test/data/variant_p2/sql/sql01.out new file mode 100644 index 0000000000..af0b2b2edd --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql01.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql01 -- + diff --git a/regression-test/data/variant_p2/sql/sql02.out b/regression-test/data/variant_p2/sql/sql02.out new file mode 100644 index 0000000000..8cbf5e8227 --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql02.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql02 -- +21881363 + diff --git a/regression-test/data/variant_p2/sql/sql03.out b/regression-test/data/variant_p2/sql/sql03.out new file mode 100644 index 0000000000..d033b82757 --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql03.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql03 -- +21881363 + diff --git a/regression-test/data/variant_p2/sql/sql04.out b/regression-test/data/variant_p2/sql/sql04.out new file mode 100644 index 0000000000..7af02ebd4f --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql04.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql04 -- +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3264","body":"Porting over https://github.com/rubyspec/rubyspec/issues/286 so we can keep track of it here:\\r\\n\\r\\n@jc00ke Can you, if you can still edit the original issue, copy-paste the Markdown into this issue (and remove this note)? I can't edit the issue so I can't copy over the tasks sadly.","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3264/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3264/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3264/comments","title":"Need Ruby 2.2 specs","updated_at":"2015-01-01T13:42:58Z","html_url":"https://github.com/rubinius/rubinius/issues/3264","created_at":"2015-01-01T13:42:58Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/YorickPeterse/starred{/owner}{/repo}","url":"https://api.github.com/users/YorickPeterse","repos_url":"https://api.github.com/users/YorickPeterse/repos","events_url":"https://api.github.com/users/YorickPeterse/events{/privacy}","login":"YorickPeterse","avatar_url":"https://avatars.githubusercontent.com/u/86065?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/YorickPeterse","received_events_url":"https://api.github.com/users/YorickPeterse/received_events","followers_url":"https://api.github.com/users/YorickPeterse/followers","following_url":"https://api.github.com/users/YorickPeterse/following{/other_user}","gists_url":"https://api.github.com/users/YorickPeterse/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/YorickPeterse/subscriptions","organizations_url":"https://api.github.com/users/YorickPeterse/orgs","id":86065},"id":53219928,"number":3264,"comments":0}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3255","body":"\\r\\n/home/jzakiya/.rvm/log/1419522856_rbx-2.4.1/rake.log\\r\\nhttps://gist.github.com/jzakiya/bca4c6fd7e79992d7032","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3255/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3255/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3255/comments","title":"rbx 2.4.1 upgrade errors","updated_at":"2015-01-01T13:48:53Z","html_url":"https://github.com/rubinius/rubinius/issues/3255","created_at":"2014-12-25T18:42:17Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/jzakiya/starred{/owner}{/repo}","url":"https://api.github.com/users/jzakiya","repos_url":"https://api.github.com/users/jzakiya/repos","events_url":"https://api.github.com/users/jzakiya/events{/privacy}","login":"jzakiya","avatar_url":"https://avatars.githubusercontent.com/u/69856?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jzakiya","received_events_url":"https://api.github.com/users/jzakiya/received_events","followers_url":"https://api.github.com/users/jzakiya/followers","following_url":"https://api.github.com/users/jzakiya/following{/other_user}","gists_url":"https://api.github.com/users/jzakiya/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jzakiya/subscriptions","organizations_url":"https://api.github.com/users/jzakiya/orgs","id":69856},"id":52869897,"number":3255,"comments":2}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3251","body":"For more details, please see the failed jobs of this [build](https://travis-ci.org/altkatz/jieba_rb/builds/44839361), It's an extension for a C++ library, works well on MRI (both on Linux and Mac OS X(10.10.1)) , and also works on Rubinius (tested 2.2.10-2.4.0 on my Mac OS X), but got the above coredump on Linux.","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3251/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3251/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3251/comments","title":"\\"Invalid handle usage detected!\\" and core dumped on Linux, but not on Mac OS X.","updated_at":"2015-01-01T13:52:46Z","html_url":"https://github.com/rubinius/rubinius/issues/3251","created_at":"2014-12-23T01:41:28Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/altkatz/starred{/owner}{/repo}","url":"https://api.github.com/users/altkatz","repos_url":"https://api.github.com/users/altkatz/repos","events_url":"https://api.github.com/users/altkatz/events{/privacy}","login":"altkatz","avatar_url":"https://avatars.githubusercontent.com/u/4903871?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/altkatz","received_events_url":"https://api.github.com/users/altkatz/received_events","followers_url":"https://api.github.com/users/altkatz/followers","following_url":"https://api.github.com/users/altkatz/following{/other_user}","gists_url":"https://api.github.com/users/altkatz/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/altkatz/subscriptions","organizations_url":"https://api.github.com/users/altkatz/orgs","id":4903871},"id":52706891,"number":3251,"comments":1}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3236","body":"`String#ascii_only?` has a cache, but I/O calls that modify the string do not invalidate this cache. Here is the simplest test case:\\r\\n\\r\\n```\\r\\na, b = IO.pipe\\r\\na.binmode\\r\\nb.binmode\\r\\nb.write(\\"\\\\xE2\\\\x9C\\\\x93\\")\\r\\nb.close\\r\\n\\r\\nbuf = \\"\\".force_encoding(\\"binary\\")\\r\\np buf.ascii_only? # should be true\\r\\na.read(1, buf)\\r\\np buf.ascii_only? # should be false\\r\\n```\\r\\n\\r\\nRubinius 2.2.10 returns true, true. MRI returns true, false.\\r\\n\\r\\nUnfortunately I have not been able to get Rubinius 2.4.1 working, so I couldn't test on that version.\\r\\n\\r\\nThis seemingly simple bug can cause all sorts of encoding problems, for example https://github.com/phusion/passenger/issues/1169. In https://github.com/phusion/passenger/issues/1169#issuecomment-66618120, I described how a change in Passenger accidentally triggered the bug.","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3236/events","labels":[{"url":"https://api.github.com/repos/rubinius/rubinius/labels/encoding","name":"encoding","color":"02d7e1"},{"url":"https://api.github.com/repos/rubinius/rubinius/labels/performance","name":"performance","color":"ff0099"}],"labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3236/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3236/comments","title":"I/O calls do not invalidate `String#ascii_only?` cache","updated_at":"2015-01-01T13:55:20Z","html_url":"https://github.com/rubinius/rubinius/issues/3236","created_at":"2014-12-11T13:28:20Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/FooBarWidget/starred{/owner}{/repo}","url":"https://api.github.com/users/FooBarWidget","repos_url":"https://api.github.com/users/FooBarWidget/repos","events_url":"https://api.github.com/users/FooBarWidget/events{/privacy}","login":"FooBarWidget","avatar_url":"https://avatars.githubusercontent.com/u/819?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/FooBarWidget","received_events_url":"https://api.github.com/users/FooBarWidget/received_events","followers_url":"https://api.github.com/users/FooBarWidget/followers","following_url":"https://api.github.com/users/FooBarWidget/following{/other_user}","gists_url":"https://api.github.com/users/FooBarWidget/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/FooBarWidget/subscriptions","organizations_url":"https://api.github.com/users/FooBarWidget/orgs","id":819},"id":51686342,"number":3236,"comments":5}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3225","body":"Can't install rbx neither via rvm nor from sources. I tried to install such rbx versions as 2.2.7, 2.2.9 and 2.2.10 and every time got the same error:\\r\\n\\r\\n```\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx ./extconf.rbc\\r\\n\\r\\n---------------------------------------------\\r\\nCRASH: A fatal error has occurred.\\r\\n\\r\\nBacktrace:\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx[0x828d072]\\r\\n[0xb77a3400]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(ffi_closure_alloc+0xe4a)[0x83d3daa]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius7FFIData6createEPNS_5StateEPNS_14NativeFunctionEiPNS_10FFIArgInfoES6_+0x5f)[0x82f211f]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius14NativeFunction4prepEPNS_5StateEiPNS_10FFIArgInfoES4_+0xbb)[0x82f264b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius14NativeFunction8generateEPNS_5StateEPNS_7PointerEPNS_6SymbolEPNS_5ArrayEPNS_6ObjectE+0x11d)[0x82f27ed]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius10Primitives23nativefunction_generateEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xd7)[0x8257ba7]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius12CompiledCode16default_executorEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xb8)[0x82c5f08]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius8CallSite11empty_cacheEPNS_5StateEPS0_PNS_9CallFrameERNS_9ArgumentsE+0x14b)[0x82c0c2b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode11interpreterEPNS_5StateEPS0_PNS_20InterpreterCallFrameE+0x18c4)[0x81e5354]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode19execute_specializedINS_14FixedArgumentsEEEPNS_6ObjectEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0x357)[0x822ac87]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius12CompiledCode16default_executorEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xb8)[0x82c5f08]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius8CallSite19empty_cache_privateEPNS_5StateEPS0_PNS_9CallFrameERNS_9ArgumentsE+0x14b)[0x82c0a3b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode11interpreterEPNS_5StateEPS0_PNS_20InterpreterCallFrameE+0x18c4)[0x81e5354]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment19execute_interpreterEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x20a)[0x82bcb7a]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment6invokeEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x81)[0x82bd361]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment10call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xd4)[0x82bd824]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius10Primitives16block_call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0x92)[0x826ead2]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius8CallSite11empty_cacheEPNS_5StateEPS0_PNS_9CallFrameERNS_9ArgumentsE+0x14b)[0x82c0c2b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode11interpreterEPNS_5StateEPS0_PNS_20InterpreterCallFrameE+0x18c4)[0x81e5354]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment19execute_interpreterEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x20a)[0x82bcb7a]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment6invokeEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x81)[0x82bd361]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment10call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xd4)[0x82bd824]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius10Primitives16block_call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0x92)[0x826ead2]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius8CallSite11empty_cacheEPNS_5StateEPS0_PNS_9CallFrameERNS_9ArgumentsE+0x14b)[0x82c0c2b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode11interpreterEPNS_5StateEPS0_PNS_20InterpreterCallFrameE+0x18c4)[0x81e5354]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment19execute_interpreterEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x20a)[0x82bcb7a]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment6invokeEPNS_5StateEPNS_9CallFrameEPS0_RNS_9ArgumentsERNS_15BlockInvocationE+0x81)[0x82bd361]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius16BlockEnvironment10call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xd4)[0x82bd824]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius10Primitives16block_call_underEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0x92)[0x826ead2]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius8CallSite11empty_cacheEPNS_5StateEPS0_PNS_9CallFrameERNS_9ArgumentsE+0x14b)[0x82c0c2b]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode11interpreterEPNS_5StateEPS0_PNS_20InterpreterCallFrameE+0x18c4)[0x81e5354]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11MachineCode19execute_specializedINS_11NoArgumentsEEEPNS_6ObjectEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0x21f)[0x822c5df]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius12CompiledCode16default_executorEPNS_5StateEPNS_9CallFrameEPNS_10ExecutableEPNS_6ModuleERNS_9ArgumentsE+0xb8)[0x82c5f08]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius12CompiledFile7executeEPNS_5StateE+0x1f8)[0x81bb788]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11Environment8run_fileESs+0x1c0)[0x81c1840]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11Environment14load_directoryESs+0x25a)[0x81c288a]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11Environment11load_kernelESs+0x35d)[0x81c30ad]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(_ZN8rubinius11Environment19run_from_filesystemEv+0x21d)[0x81c379d]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx(main+0xcc)[0x81807dc]\\r\\n/lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0xb74844d3]\\r\\n/home/vagrant/rubinius/rubinius/staging/bin/rbx[0x818e019]\\r\\n\\r\\n\\r\\nWrote full error report to: /home/vagrant/.rbx/rubinius_last_error_7042\\r\\nRun 'rbx report' to submit this crash report!\\r\\nrake aborted!\\r\\nCommand failed with status (): [/home/vagrant/rubinius/rubinius/staging/bi...]\\r\\n/home/vagrant/rubinius/rubinius/rakelib/gems.rake:5:in `bootstrap_rubinius'\\r\\n/home/vagrant/rubinius/rubinius/rakelib/gems.rake:50:in `block (3 levels) in '\\r\\n/home/vagrant/rubinius/rubinius/rakelib/gems.rake:47:in `chdir'\\r\\n/home/vagrant/rubinius/rubinius/rakelib/gems.rake:47:in `block (2 levels) in '\\r\\n/home/vagrant/.rvm/gems/ruby-2.1.5-dev/bin/ruby_executable_hooks:15:in `eval'\\r\\n/home/vagrant/.rvm/gems/ruby-2.1.5-dev/bin/ruby_executable_hooks:15:in `
'\\r\\nTasks: TOP => install => build:build => gems:melbourne\\r\\n(See full trace by running task with --trace)\\r\\n``` \\r\\n\\r\\nRubinius Crash Report: https://gist.github.com/undr/f47cce8d95b5901eb5d8\\r\\n\\r\\n```shell\\r\\n> rvm -v\\r\\nrvm 1.26.3 (latest) by Wayne E. Seguin , Michal Papis [https://rvm.io/]\\r\\n> uname -a\\r\\nLinux precise32 3.2.0-23-generic-pae #36-Ubuntu SMP Tue Apr 10 22:19:09 UTC 2012 i686 i686 i386 GNU/Linux\\r\\n> lsb_release -a\\r\\nNo LSB modules are available.\\r\\nDistributor ID:\\tUbuntu\\r\\nDescription:\\tUbuntu 12.04 LTS\\r\\nRelease:\\t12.04\\r\\nCodename:\\tprecise\\r\\n```\\r\\n\\r\\n","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3225/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3225/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3225/comments","title":"Can't install rbx neither via rvm nor from sources","updated_at":"2015-01-01T13:58:06Z","html_url":"https://github.com/rubinius/rubinius/issues/3225","created_at":"2014-11-30T15:01:08Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/undr/starred{/owner}{/repo}","url":"https://api.github.com/users/undr","repos_url":"https://api.github.com/users/undr/repos","events_url":"https://api.github.com/users/undr/events{/privacy}","login":"undr","avatar_url":"https://avatars.githubusercontent.com/u/126763?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/undr","received_events_url":"https://api.github.com/users/undr/received_events","followers_url":"https://api.github.com/users/undr/followers","following_url":"https://api.github.com/users/undr/following{/other_user}","gists_url":"https://api.github.com/users/undr/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/undr/subscriptions","organizations_url":"https://api.github.com/users/undr/orgs","id":126763},"id":50460061,"number":3225,"comments":7}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3221","body":"Let's list them out here and then work on a dashboard. We should incorporate #2006 and #2121 in the dashboard.\\r\\n\\r\\n- [ ] sidekiq\\r\\n- [ ] celluloid\\r\\n- [ ] suckerpunch\\r\\n- [ ] Rails\\r\\n - [ ] ActiveSupport\\r\\n - [ ] ActionMailer\\r\\n - [ ] ActionPack\\r\\n - [ ] ActionView\\r\\n - [ ] ActiveJob\\r\\n - [ ] ActiveModel\\r\\n - [ ] ActiveRecord","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3221/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3221/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3221/comments","title":"Ensure the tests for these projects run on Travis","updated_at":"2015-01-01T14:02:33Z","html_url":"https://github.com/rubinius/rubinius/issues/3221","created_at":"2014-11-20T18:49:51Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/jc00ke/starred{/owner}{/repo}","url":"https://api.github.com/users/jc00ke","repos_url":"https://api.github.com/users/jc00ke/repos","events_url":"https://api.github.com/users/jc00ke/events{/privacy}","login":"jc00ke","avatar_url":"https://avatars.githubusercontent.com/u/18191?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jc00ke","received_events_url":"https://api.github.com/users/jc00ke/received_events","followers_url":"https://api.github.com/users/jc00ke/followers","following_url":"https://api.github.com/users/jc00ke/following{/other_user}","gists_url":"https://api.github.com/users/jc00ke/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jc00ke/subscriptions","organizations_url":"https://api.github.com/users/jc00ke/orgs","id":18191},"id":49602877,"number":3221,"comments":1}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3217","body":"The other day, this app (https://github.com/ahwatts/mogilefs_s3_device), which is essentially a half-implemented WebDAV server that acts as an interface to S3 for our internal file-storage system, got in to a weird state and started throwing a lot of errors with this stack trace: https://gist.github.com/ahwatts/0b6c004d2c1943d4bccb . I suspect one or more of the database connections in our connection pool was broken somehow, because some requests were functioning while others were throwing this error.\\r\\n\\r\\nIt's entirely possible that I'm doing something wrong and unsupported here, but I'm not sure how to trace through the call down to the MySQL extension and then back up to Rubinius, and was wondering if y'all had any insight into this.\\r\\n\\r\\nRubinius version:\\r\\nrubinius 2.2.10 (2.1.0 bf61ae2e 2014-06-27 JI) [x86_64-linux-gnu]\\r\\n\\r\\nOS uname:\\r\\nLinux alt-storage1.internal.domain 2.6.32-431.20.3.el6.x86_64 #1 SMP Thu Jun 19 21:14:45 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux\\r\\n\\r\\nStack trace:\\r\\nhttps://gist.github.com/ahwatts/0b6c004d2c1943d4bccb\\r\\n\\r\\nSource code:\\r\\nhttps://github.com/ahwatts/mogilefs_s3_device","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3217/events","labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3217/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3217/comments","title":"NoMethodError: undefined method `to_str' on false:FalseClass in mysql2 gem","updated_at":"2015-01-01T14:06:46Z","html_url":"https://github.com/rubinius/rubinius/issues/3217","created_at":"2014-11-17T16:25:57Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/ahwatts/starred{/owner}{/repo}","url":"https://api.github.com/users/ahwatts","repos_url":"https://api.github.com/users/ahwatts/repos","events_url":"https://api.github.com/users/ahwatts/events{/privacy}","login":"ahwatts","avatar_url":"https://avatars.githubusercontent.com/u/308758?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ahwatts","received_events_url":"https://api.github.com/users/ahwatts/received_events","followers_url":"https://api.github.com/users/ahwatts/followers","following_url":"https://api.github.com/users/ahwatts/following{/other_user}","gists_url":"https://api.github.com/users/ahwatts/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ahwatts/subscriptions","organizations_url":"https://api.github.com/users/ahwatts/orgs","id":308758},"id":49111805,"number":3217,"comments":1}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3230","body":"Maybe sprinter generates illegal bytecode sequence.\\r\\n\\r\\nI got busy recently. No further digging... I'd like to fix this if time allows me... ;)\\r\\n\\r\\nThis is the one-liner\\r\\n\\r\\n```\\r\\n./bin/mspec --repeat 100000 spec/ruby/core/string/modulo_spec.rb\\r\\n```\\r\\n\\r\\n```\\r\\nAssertion failed: (Val && \\"isa<> used on a null pointer\\"), function doit, file /usr/local/Cellar/llvm/3.5.0/include/llvm/Support/Casting.h, line 95.\\r\\nThe Rubinius process is aborting with signal: SIGABRT\\r\\n--- begin system info ---\\r\\nsysname: Darwin\\r\\nnodename: foobar\\r\\nrelease: 13.3.0\\r\\nversion: Darwin Kernel Version 13.3.0: Tue Jun 3 21:27:35 PDT 2014; root:xnu-2422.110.17~1/RELEASE_X86_64\\r\\nmachine: x86_64\\r\\n--- end system info ---\\r\\n--- begin system backtrace ---\\r\\n0 rbx 0x000000010905cb48 _ZN8rubiniusL12segv_handlerEi + 248\\r\\n1 libsystem_platform.dylib 0x00007fff8d3065aa _sigtramp + 26\\r\\n2 ??? 0x0000000000000000 0x0 + 0\\r\\n3 rbx 0x000000010997ed2b abort + 22\\r\\n4 rbx 0x000000010997ed15 abort + 0\\r\\n5 rbx 0x000000010912e8e7 _ZN4llvm9IRBuilderILb1ENS_14ConstantFolderEN8rubinius26IRBuilderInserterWithDebugEE10CreateICmpENS_7CmpInst9PredicateEPNS_5ValueES8_RKNS_5TwineE + 247\\r\\n6 rbx 0x000000010915da39 _ZN8rubinius8JITVisit24check_for_exception_thenEPN4llvm5ValueEPNS1_10BasicBlockEb + 217\\r\\n7 rbx 0x0000000109165a87 _ZN8rubinius8JITVisit19check_for_exceptionEPN4llvm5ValueEb + 119\\r\\n8 rbx 0x0000000109153192 _ZN8rubinius8JITVisit16visit_send_stackERmm + 1298\\r\\n9 rbx 0x00000001091487bd _ZN8rubinius17VisitInstructionsINS_8JITVisitEE8dispatchEi + 1917\\r\\n10 rbx 0x0000000109147fc3 _ZN8rubinius3jit6Walker4callERNS_14OpcodeIteratorE + 131\\r\\n11 rbx 0x0000000109147d3e _ZN8rubinius3jit17ControlFlowWalker3runINS0_6WalkerEEEvRT_ + 446\\r\\n12 rbx 0x0000000109145f7e _ZN8rubinius3jit7Builder13generate_bodyEv + 590\\r\\n13 rbx 0x00000001091676a0 _ZN8rubinius3jit8Compiler13compile_blockEPNS_17JITCompileRequestE + 624\\r\\n14 rbx 0x000000010917dbeb _ZN8rubinius9LLVMState7performEPNS_5StateE + 1083\\r\\n15 rbx 0x000000010917d78c _ZN8rubinius19jit_llvm_trampolineEPNS_5StateE + 28\\r\\n16 rbx 0x00000001090ed97b _ZN8rubinius6Thread13in_new_threadEPv + 747\\r\\n17 libsystem_pthread.dylib 0x00007fff9744e899 _pthread_body + 138\\r\\n18 libsystem_pthread.dylib 0x00007fff9744e72a _pthread_struct_init + 0\\r\\n19 libsystem_pthread.dylib 0x00007fff97452fc9 thread_start + 13\\r\\n--- end system backtrace ---\\r\\n--- begin Ruby backtraces ---\\r\\n--- Thread 1 backtrace ---\\r\\nRubinius::Mirror.reflect in kernel/bootstrap/mirror.rb:12 (+0 jit)\\r\\nRubinius::Mirror::Array.reflect in kernel/bootstrap/array_mirror.rb:7 (+3 jit)\\r\\nArray#initialize_copy in kernel/common/array.rb:85 (+31 inline)\\r\\nKernel#initialize_dup in kernel/common/kernel.rb:358 (+0 jit)\\r\\nRubinius::Type.object_initialize_dup in kernel/common/type.rb:493 (+0 jit)\\r\\nKernel#dup in kernel/alpha.rb:207 (+22 inline)\\r\\n__block__ in kernel/common/enumerable.rb:328 (+0 jit)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nEnumerable#all? in kernel/common/enumerable.rb:328 (+7 inline)\\r\\nContextState#protect in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:178 (+18 inline)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:210 (+79 jit)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:239 (+0 jit)\\r\\nInteger#times in kernel/common/integer.rb:196 (+31)\\r\\nMSpec.repeat in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:238 (+12)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:200 (+7)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nContextState#process in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:199 (+71)\\r\\nMSpec.describe in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:0 (+108)\\r\\nObject#describe in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/object.rb:11 (+42)\\r\\nObject#__script__ in /Users/ryo-onodera/rubinius-central/spec/ruby/core/string/modulo_spec.rb:4 (+46)\\r\\nKernel.load in kernel/common/kernel.rb:497 (+58)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:57 (+8)\\r\\nBasicObject#instance_eval in kernel/common/eval.rb:43 (+120)\\r\\nMSpec.protect in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:69 (+29)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:57 (+68)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nMSpec.files in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:51 (+42)\\r\\nMSpec.process in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:43 (+11)\\r\\nMSpecRun#run in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/commands/mspec-run.rb:94 (+35)\\r\\nMSpecScript.main in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/utils/script.rb:218 (+67)\\r\\nObject#__script__ in /Users/ryo-onodera/rubinius-central/mspec/bin/mspec-run:8 (+47)\\r\\nRubinius::CodeLoader#load_script in kernel/delta/code_loader.rb:66 (+52)\\r\\nRubinius::CodeLoader.load_script in kernel/delta/code_loader.rb:152 (+40)\\r\\nRubinius::Loader#script in kernel/loader.rb:645 (+214)\\r\\nRubinius::Loader#main in kernel/loader.rb:799 (+77)\\r\\n```","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/events","labels":[{"url":"https://api.github.com/repos/rubinius/rubinius/labels/crash","name":"crash","color":"ff0000"},{"url":"https://api.github.com/repos/rubinius/rubinius/labels/JIT","name":"JIT","color":"a6d18e"}],"labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/comments","title":"Some format pattern of String#% causes SEGV when to jit","updated_at":"2015-01-01T15:09:50Z","html_url":"https://github.com/rubinius/rubinius/issues/3230","created_at":"2014-12-05T12:09:44Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/ryoqun/starred{/owner}{/repo}","url":"https://api.github.com/users/ryoqun","repos_url":"https://api.github.com/users/ryoqun/repos","events_url":"https://api.github.com/users/ryoqun/events{/privacy}","login":"ryoqun","avatar_url":"https://avatars.githubusercontent.com/u/117807?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ryoqun","received_events_url":"https://api.github.com/users/ryoqun/received_events","followers_url":"https://api.github.com/users/ryoqun/followers","following_url":"https://api.github.com/users/ryoqun/following{/other_user}","gists_url":"https://api.github.com/users/ryoqun/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ryoqun/subscriptions","organizations_url":"https://api.github.com/users/ryoqun/orgs","id":117807},"id":51092093,"number":3230,"comments":3}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3230","body":"Maybe sprinter generates illegal bytecode sequence.\\r\\n\\r\\nI got busy recently. No further digging... I'd like to fix this if time allows me... ;)\\r\\n\\r\\nThis is the one-liner\\r\\n\\r\\n```\\r\\n./bin/mspec --repeat 100000 spec/ruby/core/string/modulo_spec.rb\\r\\n```\\r\\n\\r\\n```\\r\\nAssertion failed: (Val && \\"isa<> used on a null pointer\\"), function doit, file /usr/local/Cellar/llvm/3.5.0/include/llvm/Support/Casting.h, line 95.\\r\\nThe Rubinius process is aborting with signal: SIGABRT\\r\\n--- begin system info ---\\r\\nsysname: Darwin\\r\\nnodename: foobar\\r\\nrelease: 13.3.0\\r\\nversion: Darwin Kernel Version 13.3.0: Tue Jun 3 21:27:35 PDT 2014; root:xnu-2422.110.17~1/RELEASE_X86_64\\r\\nmachine: x86_64\\r\\n--- end system info ---\\r\\n--- begin system backtrace ---\\r\\n0 rbx 0x000000010905cb48 _ZN8rubiniusL12segv_handlerEi + 248\\r\\n1 libsystem_platform.dylib 0x00007fff8d3065aa _sigtramp + 26\\r\\n2 ??? 0x0000000000000000 0x0 + 0\\r\\n3 rbx 0x000000010997ed2b abort + 22\\r\\n4 rbx 0x000000010997ed15 abort + 0\\r\\n5 rbx 0x000000010912e8e7 _ZN4llvm9IRBuilderILb1ENS_14ConstantFolderEN8rubinius26IRBuilderInserterWithDebugEE10CreateICmpENS_7CmpInst9PredicateEPNS_5ValueES8_RKNS_5TwineE + 247\\r\\n6 rbx 0x000000010915da39 _ZN8rubinius8JITVisit24check_for_exception_thenEPN4llvm5ValueEPNS1_10BasicBlockEb + 217\\r\\n7 rbx 0x0000000109165a87 _ZN8rubinius8JITVisit19check_for_exceptionEPN4llvm5ValueEb + 119\\r\\n8 rbx 0x0000000109153192 _ZN8rubinius8JITVisit16visit_send_stackERmm + 1298\\r\\n9 rbx 0x00000001091487bd _ZN8rubinius17VisitInstructionsINS_8JITVisitEE8dispatchEi + 1917\\r\\n10 rbx 0x0000000109147fc3 _ZN8rubinius3jit6Walker4callERNS_14OpcodeIteratorE + 131\\r\\n11 rbx 0x0000000109147d3e _ZN8rubinius3jit17ControlFlowWalker3runINS0_6WalkerEEEvRT_ + 446\\r\\n12 rbx 0x0000000109145f7e _ZN8rubinius3jit7Builder13generate_bodyEv + 590\\r\\n13 rbx 0x00000001091676a0 _ZN8rubinius3jit8Compiler13compile_blockEPNS_17JITCompileRequestE + 624\\r\\n14 rbx 0x000000010917dbeb _ZN8rubinius9LLVMState7performEPNS_5StateE + 1083\\r\\n15 rbx 0x000000010917d78c _ZN8rubinius19jit_llvm_trampolineEPNS_5StateE + 28\\r\\n16 rbx 0x00000001090ed97b _ZN8rubinius6Thread13in_new_threadEPv + 747\\r\\n17 libsystem_pthread.dylib 0x00007fff9744e899 _pthread_body + 138\\r\\n18 libsystem_pthread.dylib 0x00007fff9744e72a _pthread_struct_init + 0\\r\\n19 libsystem_pthread.dylib 0x00007fff97452fc9 thread_start + 13\\r\\n--- end system backtrace ---\\r\\n--- begin Ruby backtraces ---\\r\\n--- Thread 1 backtrace ---\\r\\nRubinius::Mirror.reflect in kernel/bootstrap/mirror.rb:12 (+0 jit)\\r\\nRubinius::Mirror::Array.reflect in kernel/bootstrap/array_mirror.rb:7 (+3 jit)\\r\\nArray#initialize_copy in kernel/common/array.rb:85 (+31 inline)\\r\\nKernel#initialize_dup in kernel/common/kernel.rb:358 (+0 jit)\\r\\nRubinius::Type.object_initialize_dup in kernel/common/type.rb:493 (+0 jit)\\r\\nKernel#dup in kernel/alpha.rb:207 (+22 inline)\\r\\n__block__ in kernel/common/enumerable.rb:328 (+0 jit)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nEnumerable#all? in kernel/common/enumerable.rb:328 (+7 inline)\\r\\nContextState#protect in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:178 (+18 inline)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:210 (+79 jit)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:239 (+0 jit)\\r\\nInteger#times in kernel/common/integer.rb:196 (+31)\\r\\nMSpec.repeat in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:238 (+12)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:200 (+7)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nContextState#process in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/context.rb:199 (+71)\\r\\nMSpec.describe in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:0 (+108)\\r\\nObject#describe in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/object.rb:11 (+42)\\r\\nObject#__script__ in /Users/ryo-onodera/rubinius-central/spec/ruby/core/string/modulo_spec.rb:4 (+46)\\r\\nKernel.load in kernel/common/kernel.rb:497 (+58)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:57 (+8)\\r\\nBasicObject#instance_eval in kernel/common/eval.rb:43 (+120)\\r\\nMSpec.protect in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:69 (+29)\\r\\n__block__ in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:57 (+68)\\r\\nArray#each in kernel/bootstrap/array.rb:76 (+51 jit)\\r\\nMSpec.files in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:51 (+42)\\r\\nMSpec.process in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/runner/mspec.rb:43 (+11)\\r\\nMSpecRun#run in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/commands/mspec-run.rb:94 (+35)\\r\\nMSpecScript.main in /Users/ryo-onodera/rubinius-central/mspec/lib/mspec/utils/script.rb:218 (+67)\\r\\nObject#__script__ in /Users/ryo-onodera/rubinius-central/mspec/bin/mspec-run:8 (+47)\\r\\nRubinius::CodeLoader#load_script in kernel/delta/code_loader.rb:66 (+52)\\r\\nRubinius::CodeLoader.load_script in kernel/delta/code_loader.rb:152 (+40)\\r\\nRubinius::Loader#script in kernel/loader.rb:645 (+214)\\r\\nRubinius::Loader#main in kernel/loader.rb:799 (+77)\\r\\n```","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/events","labels":[{"url":"https://api.github.com/repos/rubinius/rubinius/labels/crash","name":"crash","color":"ff0000"},{"url":"https://api.github.com/repos/rubinius/rubinius/labels/JIT","name":"JIT","color":"a6d18e"},{"url":"https://api.github.com/repos/rubinius/rubinius/labels/Needs+feedback","name":"Needs feedback","color":"fef2c0"}],"labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3230/comments","title":"Some format pattern of String#% causes SEGV when to jit","updated_at":"2015-01-01T15:20:57Z","html_url":"https://github.com/rubinius/rubinius/issues/3230","created_at":"2014-12-05T12:09:44Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/ryoqun/starred{/owner}{/repo}","url":"https://api.github.com/users/ryoqun","repos_url":"https://api.github.com/users/ryoqun/repos","events_url":"https://api.github.com/users/ryoqun/events{/privacy}","login":"ryoqun","avatar_url":"https://avatars.githubusercontent.com/u/117807?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ryoqun","received_events_url":"https://api.github.com/users/ryoqun/received_events","followers_url":"https://api.github.com/users/ryoqun/followers","following_url":"https://api.github.com/users/ryoqun/following{/other_user}","gists_url":"https://api.github.com/users/ryoqun/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ryoqun/subscriptions","organizations_url":"https://api.github.com/users/ryoqun/orgs","id":117807},"id":51092093,"number":3230,"comments":4}} +27 {"issue":{"url":"https://api.github.com/repos/rubinius/rubinius/issues/3265","body":"Porting from rubyspec/rubyspec#23\\r\\n\\r\\nhttp://gist.github.com/437039\\r\\n\\r\\n- [ ] Pull out shared specs\\r\\n- [ ] Review specs","events_url":"https://api.github.com/repos/rubinius/rubinius/issues/3265/events","labels":[{"url":"https://api.github.com/repos/rubinius/rubinius/labels/spec","name":"spec","color":"d4d4d4"}],"labels_url":"https://api.github.com/repos/rubinius/rubinius/issues/3265/labels{/name}","comments_url":"https://api.github.com/repos/rubinius/rubinius/issues/3265/comments","title":"Kernel.chomp specs","updated_at":"2015-01-01T18:03:50Z","html_url":"https://github.com/rubinius/rubinius/issues/3265","created_at":"2015-01-01T18:03:50Z","locked":0,"state":"open","user":{"starred_url":"https://api.github.com/users/jc00ke/starred{/owner}{/repo}","url":"https://api.github.com/users/jc00ke","repos_url":"https://api.github.com/users/jc00ke/repos","events_url":"https://api.github.com/users/jc00ke/events{/privacy}","login":"jc00ke","avatar_url":"https://avatars.githubusercontent.com/u/18191?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jc00ke","received_events_url":"https://api.github.com/users/jc00ke/received_events","followers_url":"https://api.github.com/users/jc00ke/followers","following_url":"https://api.github.com/users/jc00ke/following{/other_user}","gists_url":"https://api.github.com/users/jc00ke/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jc00ke/subscriptions","organizations_url":"https://api.github.com/users/jc00ke/orgs","id":18191},"id":53224957,"number":3265,"comments":0}} + diff --git a/regression-test/data/variant_p2/sql/sql05.out b/regression-test/data/variant_p2/sql/sql05.out new file mode 100644 index 0000000000..5e2167b9db --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql05.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql05 -- +10776362 + diff --git a/regression-test/data/variant_p2/sql/sql06.out b/regression-test/data/variant_p2/sql/sql06.out new file mode 100644 index 0000000000..247a88c06e --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql06.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql06 -- +1910661 + diff --git a/regression-test/data/variant_p2/sql/sql07.out b/regression-test/data/variant_p2/sql/sql07.out new file mode 100644 index 0000000000..841f7f09d9 --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql07.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql07 -- +0 + +-- !sql07_2 -- +[{"sha":"dbd68d30ee1f7b60d404553fc1c6226ebb374c8e","author":{"email":"88de463b5797707cf3425f85a415c3d869db732b@gmail.com","name":"Soumith Chintala"},"message":"back to old structure, except lua files moved out","distinct":true,"url":"https://api.github.com/repos/soumith/fbcunn/commits/dbd68d30ee1f7b60d404553fc1c6226ebb374c8e"},{"sha":"5567f9f5a83d7fe3320b18e5b89405e8a5ca77e6","author":{"email":"88de463b5797707cf3425f85a415c3d869db732b@gmail.com","name":"Soumith Chintala"},"message":"...","distinct":true,"url":"https://api.github.com/repos/soumith/fbcunn/commits/5567f9f5a83d7fe3320b18e5b89405e8a5ca77e6"},{"sha":"58a83b277328eca811d3a37cf171b2fc4fcd87af","author":{"email":"88de463b5797707cf3425f85a415c3d869db732b@gmail.com","name":"Soumith Chintala"},"message":"...","distinct":true,"url":"https://api.github.com/repos/soumith/fbcunn/commits/58a83b277328eca811d3a37cf171b2fc4fcd87af"},{"sha":"fa6048ec9b9eeafd12cee5f81324f355e1f2a198","author":{"email":"88de463b5797707cf3425f85a415c3d869db732b@gmail.com","name":"Soumith Chintala"},"message":"...","distinct":true,"url":"https://api.github.com/repos/soumith/fbcunn/commits/fa6048ec9b9eeafd12cee5f81324f355e1f2a198"}] +[{"sha":"defdbe78db98ad69d72f42b09194309f47616592","author":{"email":"5f33e8ddd36b0c849687df732835b9abbe9b347b@twistedmatrix.com","name":"Christopher Armstrong"},"message":"put the auto-generated API docs in the repository so readthedocs will work.\\nsigh.","distinct":true,"url":"https://api.github.com/repos/radix/effect/commits/defdbe78db98ad69d72f42b09194309f47616592"}] +\N +[{"sha":"4bb12488d56ea651c56d9688996b464b99095582","author":{"email":"291c18f3fb7528c712d9098b0e50a515ea0b91d5@cloudera.com","name":"Sean Owen"},"message":"SPARK-2757 [BUILD] [STREAMING] Add Mima test for Spark Sink after 1.10 is released\\n\\nRe-enable MiMa for Streaming Flume Sink module, now that 1.1.0 is released, per the JIRA TO-DO. That's pretty much all there is to this.\\n\\nAuthor: Sean Owen \\n\\nCloses #3842 from srowen/SPARK-2757 and squashes the following commits:\\n\\n50ff80e [Sean Owen] Exclude apparent false positive turned up by re-enabling MiMa checks for Streaming Flume Sink\\n0e5ba5c [Sean Owen] Re-enable MiMa for Streaming Flume Sink module","distinct":true,"url":"https://api.github.com/repos/apache/spark/commits/4bb12488d56ea651c56d9688996b464b99095582"}] +\N +[{"sha":"0fb2391e7aabeb57f358d5d51c70c766e6fa00d1","author":{"email":"821f468726cd384db724fde38ddabae6642cf80c@gmail.com","name":"Uygun BODUR"},"message":".","distinct":true,"url":"https://api.github.com/repos/uygunuks/AsalSayiKalibi/commits/0fb2391e7aabeb57f358d5d51c70c766e6fa00d1"}] +[{"sha":"05438aee0e5572a6e4adf91d7eef32917812d9e9","author":{"email":"de163e90d3aeef9f404d1de71c48e234a211e3c3@gmail.com","name":"KT"},"message":"Update","distinct":true,"url":"https://api.github.com/repos/qdm/qdm.github.io/commits/05438aee0e5572a6e4adf91d7eef32917812d9e9"}] +[{"sha":"44753191dc8f615ccda4f0afe31a09342172cfe4","author":{"email":"7fbc091194a9488bfb16868527a7c3a8ba469dba@gmail.com","name":"Seth Carter"},"message":"Wed Dec 31 20:00:02 EST 2014","distinct":true,"url":"https://api.github.com/repos/sundaymtn/waterline/commits/44753191dc8f615ccda4f0afe31a09342172cfe4"}] +\N +\N + diff --git a/regression-test/data/variant_p2/sql/sql08.out b/regression-test/data/variant_p2/sql/sql08.out new file mode 100644 index 0000000000..36e3e60810 --- /dev/null +++ b/regression-test/data/variant_p2/sql/sql08.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql08 -- +{"issue":{"user":{"starred_url":"https://api.github.com/users/YorickPeterse/starred{/owner}{/repo}","url":"https://api.github.com/users/YorickPeterse","repos_url":"https://api.github.com/users/YorickPeterse/repos","events_url":"https://api.github.com/users/YorickPeterse/events{/privacy}","login":"YorickPeterse","avatar_url":"https://avatars.githubusercontent.com/u/86065?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/YorickPeterse","received_events_url":"https://api.github.com/users/YorickPeterse/received_events","followers_url":"https://api.github.com/users/YorickPeterse/followers","following_url":"https://api.github.com/users/YorickPeterse/following{/other_user}","gists_url":"https://api.github.com/users/YorickPeterse/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/YorickPeterse/subscriptions","organizations_url":"https://api.github.com/users/YorickPeterse/orgs","id":86065}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/jzakiya/starred{/owner}{/repo}","url":"https://api.github.com/users/jzakiya","repos_url":"https://api.github.com/users/jzakiya/repos","events_url":"https://api.github.com/users/jzakiya/events{/privacy}","login":"jzakiya","avatar_url":"https://avatars.githubusercontent.com/u/69856?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jzakiya","received_events_url":"https://api.github.com/users/jzakiya/received_events","followers_url":"https://api.github.com/users/jzakiya/followers","following_url":"https://api.github.com/users/jzakiya/following{/other_user}","gists_url":"https://api.github.com/users/jzakiya/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jzakiya/subscriptions","organizations_url":"https://api.github.com/users/jzakiya/orgs","id":69856}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/altkatz/starred{/owner}{/repo}","url":"https://api.github.com/users/altkatz","repos_url":"https://api.github.com/users/altkatz/repos","events_url":"https://api.github.com/users/altkatz/events{/privacy}","login":"altkatz","avatar_url":"https://avatars.githubusercontent.com/u/4903871?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/altkatz","received_events_url":"https://api.github.com/users/altkatz/received_events","followers_url":"https://api.github.com/users/altkatz/followers","following_url":"https://api.github.com/users/altkatz/following{/other_user}","gists_url":"https://api.github.com/users/altkatz/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/altkatz/subscriptions","organizations_url":"https://api.github.com/users/altkatz/orgs","id":4903871}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/FooBarWidget/starred{/owner}{/repo}","url":"https://api.github.com/users/FooBarWidget","repos_url":"https://api.github.com/users/FooBarWidget/repos","events_url":"https://api.github.com/users/FooBarWidget/events{/privacy}","login":"FooBarWidget","avatar_url":"https://avatars.githubusercontent.com/u/819?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/FooBarWidget","received_events_url":"https://api.github.com/users/FooBarWidget/received_events","followers_url":"https://api.github.com/users/FooBarWidget/followers","following_url":"https://api.github.com/users/FooBarWidget/following{/other_user}","gists_url":"https://api.github.com/users/FooBarWidget/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/FooBarWidget/subscriptions","organizations_url":"https://api.github.com/users/FooBarWidget/orgs","id":819}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/undr/starred{/owner}{/repo}","url":"https://api.github.com/users/undr","repos_url":"https://api.github.com/users/undr/repos","events_url":"https://api.github.com/users/undr/events{/privacy}","login":"undr","avatar_url":"https://avatars.githubusercontent.com/u/126763?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/undr","received_events_url":"https://api.github.com/users/undr/received_events","followers_url":"https://api.github.com/users/undr/followers","following_url":"https://api.github.com/users/undr/following{/other_user}","gists_url":"https://api.github.com/users/undr/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/undr/subscriptions","organizations_url":"https://api.github.com/users/undr/orgs","id":126763}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/jc00ke/starred{/owner}{/repo}","url":"https://api.github.com/users/jc00ke","repos_url":"https://api.github.com/users/jc00ke/repos","events_url":"https://api.github.com/users/jc00ke/events{/privacy}","login":"jc00ke","avatar_url":"https://avatars.githubusercontent.com/u/18191?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jc00ke","received_events_url":"https://api.github.com/users/jc00ke/received_events","followers_url":"https://api.github.com/users/jc00ke/followers","following_url":"https://api.github.com/users/jc00ke/following{/other_user}","gists_url":"https://api.github.com/users/jc00ke/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jc00ke/subscriptions","organizations_url":"https://api.github.com/users/jc00ke/orgs","id":18191}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/ahwatts/starred{/owner}{/repo}","url":"https://api.github.com/users/ahwatts","repos_url":"https://api.github.com/users/ahwatts/repos","events_url":"https://api.github.com/users/ahwatts/events{/privacy}","login":"ahwatts","avatar_url":"https://avatars.githubusercontent.com/u/308758?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ahwatts","received_events_url":"https://api.github.com/users/ahwatts/received_events","followers_url":"https://api.github.com/users/ahwatts/followers","following_url":"https://api.github.com/users/ahwatts/following{/other_user}","gists_url":"https://api.github.com/users/ahwatts/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ahwatts/subscriptions","organizations_url":"https://api.github.com/users/ahwatts/orgs","id":308758}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/ryoqun/starred{/owner}{/repo}","url":"https://api.github.com/users/ryoqun","repos_url":"https://api.github.com/users/ryoqun/repos","events_url":"https://api.github.com/users/ryoqun/events{/privacy}","login":"ryoqun","avatar_url":"https://avatars.githubusercontent.com/u/117807?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ryoqun","received_events_url":"https://api.github.com/users/ryoqun/received_events","followers_url":"https://api.github.com/users/ryoqun/followers","following_url":"https://api.github.com/users/ryoqun/following{/other_user}","gists_url":"https://api.github.com/users/ryoqun/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ryoqun/subscriptions","organizations_url":"https://api.github.com/users/ryoqun/orgs","id":117807}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/ryoqun/starred{/owner}{/repo}","url":"https://api.github.com/users/ryoqun","repos_url":"https://api.github.com/users/ryoqun/repos","events_url":"https://api.github.com/users/ryoqun/events{/privacy}","login":"ryoqun","avatar_url":"https://avatars.githubusercontent.com/u/117807?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/ryoqun","received_events_url":"https://api.github.com/users/ryoqun/received_events","followers_url":"https://api.github.com/users/ryoqun/followers","following_url":"https://api.github.com/users/ryoqun/following{/other_user}","gists_url":"https://api.github.com/users/ryoqun/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/ryoqun/subscriptions","organizations_url":"https://api.github.com/users/ryoqun/orgs","id":117807}}} +{"issue":{"user":{"starred_url":"https://api.github.com/users/jc00ke/starred{/owner}{/repo}","url":"https://api.github.com/users/jc00ke","repos_url":"https://api.github.com/users/jc00ke/repos","events_url":"https://api.github.com/users/jc00ke/events{/privacy}","login":"jc00ke","avatar_url":"https://avatars.githubusercontent.com/u/18191?v=3","gravatar_id":"","site_admin":0,"html_url":"https://github.com/jc00ke","received_events_url":"https://api.github.com/users/jc00ke/received_events","followers_url":"https://api.github.com/users/jc00ke/followers","following_url":"https://api.github.com/users/jc00ke/following{/other_user}","gists_url":"https://api.github.com/users/jc00ke/gists{/gist_id}","type":"User","subscriptions_url":"https://api.github.com/users/jc00ke/subscriptions","organizations_url":"https://api.github.com/users/jc00ke/orgs","id":18191}}} + diff --git a/regression-test/data/variant_p2/sql/theLongestRepositoryNames1.out b/regression-test/data/variant_p2/sql/theLongestRepositoryNames1.out new file mode 100644 index 0000000000..bacc785915 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theLongestRepositoryNames1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theLongestRepositoryNames1 -- +1 MicrocontrollersAndMore/OpenCV_Tutorial_11_Object_Detection_and_Tracking_via_SURF_Speeded_Up_Robust_Features_in_Emgu_CV +1 vintagegamingsystems/Disk-Low-Level-Discovery-for-Physical-Disk-within-Windows-Performance-Monitoring-in-Zabbix-2.0 +1 monticeharmon/https-play.google.com-store-music-album-Montice_Harmon_Cold_Winters_Night-id-Baicxouzhmtwlne3p6ls6 +2 Snooker147/sjkfgshdjfgsjhdfgsjhcfasghsjgkwjefriougijdbfjfgqwhfdsjsdhfjhsdgfwfrwghehjdsffffjhgsfsdfjwhegwejhgsjd +1 GITenberg/De-legende-en-de-heldhaftige-vroolijke-en-roemrijke-daden-van-Uilenspiegel-en-Lamme-Goedzak-i__11208 +1 GITenberg/Fifteen-Thousand-Useful-Phrases--13-A-Practical-Handbook-Of-Pertinent-Expressions-Striking-Si__18362 +1 GITenberg/The-Rangers--or-The-Tory-s-Daughter--13-A-Tale-Illustrative-of-the-Revolutionary-History-of-Ve__6947 +2 rtorr/yo_luke_how_have_you_been_is_the_weather_nice_up_payson_I_wonder_how_long_this_can_get_whoa_impressi +1 amigojapan/Arduino-Retro-Computer-with-SD-card-and-LCD-display-and-Keyboard-input-with-BASIC-interpreter +1 bradparks/Exchange_SOAP_example_using_NTLM_and_BASIC_authentication_from_java_for_desktop_and_android +1 amangudan/-Cara-Mendapatkan-Pulsa-Gratis-dengan-Mudah-di-Android-Menggunakan-Aplikasi-Pudding-Points +1 eb0f8345cecce97c9a9d85f635bdd860bf/f86369183b1c3e6ee785a9b7b4181dc5d4a0b70bf8b2775ddc20076e1d43738f +1 ForensicTools/OpenSourceArtifactollectionToolkit-OSACT-475_2141-Cifranic-Mercado-Simmonds-Voellmer +1 USA24/-a-href-https-godoc.org-github.com-google-go-github-img-src-https-godoc.org-github.com-googl +1 amangudan/Cara-Termudah-Mendapatkan-Uang-Dollar-di-Internet-dengan-Aplikasi-Android-secara-Gratis +1 elota/-alt-https-codenvy.com-factory-resources-factory-white.png-https-codenvy.com-ide-resources- +1 NSCookbook/Recipe-16.2-Populating-a-UITableView-With-Data-From-The-Web---iOS-7-and-AFNetworking +1 Alhesnawi/Car-Door-Lock-System-Control-Via-SMS-Remote-Control-Using-an-Arduino-Microcontroller +1 mastersoftwaresolutions/magento-extension-to-create-different-homepage-for-logged-In-customer +1 visualtecnologicc/git-remote-add-origin-https---github.com-visualtecnologicc-database-oposte +1 AkhilReddy/MITx-6.00.1x-Introduction-to-Computer-Science-and-Programming-Using-Python-Codes +1 FIU-SCIS-Senior-Project-2015-Spring/Smart-Systems-for-Occupancy-and-Building-Energy-Control +2 Hispano/Guia-sobre-Git-Github-y-Metodologia-de-Desarrollo-de-Software-usando-Git-y-Github +1 dimitrisgousios/git-clone-ssh-dimitrisgousios-git.code.sf.net-p-web-box-code-web-box-code +1 vivek73153/IOS-Application-Projects-Demos-Useful-Much-and-Much-All-are-Very-Nice-Projects +1 ap-tech/Getting-started-with-the-TSL1401CL-linescan-camera-with-arduino-and-processing.- +1 fi-content2-games-platform/FIcontent.Gaming.Enabler.RealityMixer.CameraArtifactRendering +1 mahmudahsan/Graph-API---IFrame-Base-Facebook-Application-Development-PHP-SDK-3.0--Source +3 Theofilos-Chamalis/edX-6.00.2x-Introduction-to-Computational-Thinking-and-Data-Science +1 UndeadBaneGitHub/a-very-long-repo-name-here-to-see-how-the-design-behaves-in-this-case +3 mr09gasp/SienaCollegeSoftwareEngineering-2014-15-Team-Documentation---Maroon-Solutions +1 sethwoodworth/The-Jesuit-Relations-and-Allied-Documents-Vol.-V--Quebec-1632-1633_48562 +2 theappguruz/DRAW-LINE-ON-MOUSE-MOVE-AND-DETECT-LINE-COLLISION-IN-UNITY-2D-AND-UNITY-3D +1 brent-humphries/https-help.github.com-assets-images-help-issues-issues_search_bar.png +1 RussellPolitzky/Castle-Windsor-WCF-Service-With-Interceptor-and-Meta-Data-Publishing +1 YaoHuiJi/Professional-Node.js-Building-Javascript-Based-Scalable-Software-in-Chinese +1 antgraf/C--Numbered-Lines-Control-for-RichTextBox--LineNumbersControlForRichTextBox- +1 harmeetsingh0013/Demo-Project-using-Spring-Security-Oauth2-Thymeleaf-and-Spring-Data +2 anshinfosolz/Advantages-of-Custom-WordPress-Development-of-a-Top-Web-Design-Company +1 hbwjhbwj/https---github.com-mongodb-mongo-java-driver-tree-master-src-main-org-bson +1 larrybuch/d3-things-i-copy-and-paste-from-old-projects-all-the-time-in-one-document +4 GoogleCloudPlatform/solutions-load-balanced-gaming-server-on-google-compute-engine +1 ictoi/transfer-ownership-of-google-drive-files-via-service-account-and-impersonate +1 rohitsinha54/Coursera-Introduction-to-Recommender-Systems-Programming-Assignment-5 +1 senthilakr88/Database--Query-Optimization--Indexer--Parser--and-Interpreter--Java- +1 tvmeubel/Dressoir--tv-meubel-of-tv-meubel-met-design-uitstraling-bij-Zitmaxx-Wonen +1 FrontenderMagazine/javascript-ecma-262-tc39-and-ecmascript-transpilers-explained +1 tegansnyder/Magento-Programmatically-Create-Recurring-Profiles-Authorize.net-CIM +2 Dev-Dipesh/Vagrant-Ansible-NewRelic-setup-for-Node.Js-on-both-Windows-and-Linux +3 FurongHuang/Fast-Detection-of-Overlapping-Communities-via-Online-Tensor-Methods + diff --git a/regression-test/data/variant_p2/sql/theLongestRepositoryNames2.out b/regression-test/data/variant_p2/sql/theLongestRepositoryNames2.out new file mode 100644 index 0000000000..ae941f8e12 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theLongestRepositoryNames2.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theLongestRepositoryNames2 -- +hf/q 2 +mg/i 1 +tj/n 450 +0a-/M 1 +bpq/1 1 +bpq/q 2 +i3/i3 281 +knu/z 1 +kr/fs 6 +kr/hk 2 +kr/s3 9 +m8w/1 1 +m8w/2 1 +nf/dl 7 +rf/nd 4 +rr/rr 7 +ry/hl 70 +s4/s4 1 +sjl/d 8 +sjl/t 23 +sm/sm 2 +tj/bm 3 +tj/co 453 +v8/v8 128 +wq/wq 5 +xtk/X 21 +0a-/0a 4 +0k/oem 1 +18F/C2 2 +4ad/go 15 +550/ca 1 +8l/ged 1 +CTA/PS 1 +Cue/qc 2 +Ink/fp 1 +OCA/hr 2 +OJ/csd 1 +Pana/n 3 +USP/BD 1 +USP/IA 1 +Xe/dev 1 +akr/tb 1 +aoh/ni 1 +apk/ak 1 +ast/cv 1 +avz/pp 2 +bem/bh 8 +bitc/1 1 +bq/zob 1 +bq/zum 6 + diff --git a/regression-test/data/variant_p2/sql/theMostToughCodeReviews.out b/regression-test/data/variant_p2/sql/theMostToughCodeReviews.out new file mode 100644 index 0000000000..e4111f443f --- /dev/null +++ b/regression-test/data/variant_p2/sql/theMostToughCodeReviews.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theMostToughCodeReviews -- +https://github.com/Homebrew/homebrew/pull/ 257 +https://github.com/apache/spark/pull/ 198 +https://github.com/rails/rails/pull/ 171 +https://github.com/rust-lang/rust/pull/ 156 +https://github.com/docker/docker/pull/ 135 +https://github.com/mozilla-b2g/gaia/pull/ 123 +https://github.com/symfony/symfony/pull/ 118 +https://github.com/GoogleCloudPlatform/kubernetes/pull/ 107 +https://github.com/rust-lang/rfcs/pull/ 107 +https://github.com/edx/edx-platform/pull/ 103 +https://github.com/deadlyvipers/dojo_rules/pull/ 85 +https://github.com/odoo/odoo/pull/ 85 +https://github.com/NixOS/nixpkgs/pull/ 84 +https://github.com/django/django/pull/ 84 +https://github.com/iojs/io.js/pull/ 84 +https://github.com/tgstation/-tg-station/pull/ 81 +https://github.com/cms-sw/cmssw/pull/ 76 +https://github.com/laravel/framework/pull/ 71 +https://github.com/dotnet/corefx/pull/ 70 +https://github.com/JuliaLang/julia/pull/ 59 +https://github.com/dotnet/roslyn/pull/ 58 +https://github.com/xbmc/xbmc/pull/ 58 +https://github.com/php/php-src/pull/ 56 +https://github.com/dolphin-emu/dolphin/pull/ 55 +https://github.com/scikit-learn/scikit-learn/pull/ 54 +https://github.com/puppetlabs/puppet/pull/ 53 +https://github.com/D-Programming-Language/phobos/pull/ 52 +https://github.com/joomla/joomla-cms/pull/ 50 +https://github.com/iojs/website/pull/ 49 +https://github.com/facebook/react/pull/ 48 +https://github.com/angular/angular.js/pull/ 47 +https://github.com/thoughtbot/til/pull/ 47 +https://github.com/dotnet/coreclr/pull/ 46 +https://github.com/sympy/sympy/pull/ 46 +https://github.com/TrinityCore/TrinityCore/pull/ 45 +https://github.com/symfony/symfony-docs/pull/ 45 +https://github.com/caskroom/homebrew-cask/pull/ 44 +https://github.com/ceph/ceph/pull/ 44 +https://github.com/emberjs/ember.js/pull/ 44 +https://github.com/scipy/scipy/pull/ 44 +https://github.com/Wikia/app/pull/ 43 +https://github.com/neovim/neovim/pull/ 43 +https://github.com/CleverRaven/Cataclysm-DDA/pull/ 42 +https://github.com/cakephp/cakephp/pull/ 42 +https://github.com/rapid7/metasploit-framework/pull/ 42 +https://github.com/saltstack/salt/pull/ 42 +https://github.com/ManageIQ/manageiq/pull/ 41 +https://github.com/owncloud/core/pull/ 41 +https://github.com/matplotlib/matplotlib/pull/ 40 +https://github.com/zendframework/zf2/pull/ 40 + diff --git a/regression-test/data/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.out b/regression-test/data/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.out new file mode 100644 index 0000000000..40fcc46bc8 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfRepositoriesOnGithub -- +3675646 + diff --git a/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub1.out b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub1.out new file mode 100644 index 0000000000..fe0d72e962 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub1 -- +1737953 + diff --git a/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub2.out b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub2.out new file mode 100644 index 0000000000..717e8e9515 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub2 -- +524456 + diff --git a/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub3.out b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub3.out new file mode 100644 index 0000000000..093f92f976 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub3.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub3 -- +975615 + diff --git a/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub4.out b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub4.out new file mode 100644 index 0000000000..3b00e391e1 --- /dev/null +++ b/regression-test/data/variant_p2/sql/theTotalNumberOfUsersOnGithub4.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !theTotalNumberOfUsersOnGithub4 -- +228702 + diff --git a/regression-test/data/variant_p2/sql/topRepositoriesByStars.out b/regression-test/data/variant_p2/sql/topRepositoriesByStars.out new file mode 100644 index 0000000000..1221a8714c --- /dev/null +++ b/regression-test/data/variant_p2/sql/topRepositoriesByStars.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !topRepositoriesByStars -- +facebook/react-native 10021 +phanan/htaccess 7964 +alex/what-happens-when 7407 +facebook/react 6762 +moklick/frontend-stuff 6528 +prakhar1989/awesome-courses 6110 +yaronn/blessed-contrib 5880 +arasatasaygin/is.js 5498 +vhf/free-programming-books 5437 +tiimgreen/github-cheat-sheet 5420 +gorhill/uBlock 5415 +Flipboard/react-canvas 5187 +h5bp/Front-end-Developer-Interview-Questions 5177 +sdelements/lets-chat 5045 +iojs/io.js 4866 +bendc/frontend-guidelines 4696 +twbs/bootstrap 4475 +getify/You-Dont-Know-JS 4344 +0xAX/linux-insides 4220 +IanLunn/Hover 4102 +dotnet/coreclr 4049 +daniel-lundin/snabbt.js 3996 +angular/angular.js 3942 +Mango/slideout 3866 +twostairs/paperwork 3684 +Dogfalo/materialize 3664 +libreboard/libreboard 3469 +mbostock/d3 3422 +Selz/plyr 3403 +USArmyResearchLab/Dshell 3401 +LeaVerou/awesomplete 3390 +MengTo/Spring 3276 +muut/riotjs 3244 +wasabeef/awesome-android-ui 3225 +airbnb/javascript 3155 +sophron/wifiphisher 3142 +KeyboardFire/mkcast 3134 +google/fonts 3122 +mikechau/react-primer-draft 3044 +jhauswald/sirius 3030 +sbstjn/timesheet.js 3005 +github/gitignore 2926 +torvalds/linux 2907 +IonicaBizau/git-stats 2896 +googlesamples/android-UniversalMusicPlayer 2838 +nwjs/nw.js 2829 +primer/primer 2816 +lukehoban/es6features 2810 +FortAwesome/Font-Awesome 2798 +driftyco/ionic 2783 + diff --git a/regression-test/data/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.out b/regression-test/data/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.out new file mode 100644 index 0000000000..03d42c917e --- /dev/null +++ b/regression-test/data/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whatIsTheBestDayOfTheWeekToCatchAStar -- +1 413283 +2 595194 +3 656708 +4 618366 +5 606669 +6 606171 +7 454694 + diff --git a/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars1.out b/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars1.out new file mode 100644 index 0000000000..e34c40c6ca --- /dev/null +++ b/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars1.out @@ -0,0 +1,53 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whoAreAllThosePeopleGivingStars1 -- +mcanthony 6085 +pkt 3437 +kojiwakayama 3388 +maoabc1818 2392 +trnsz 2227 +JT5D 2020 +jiangplus 2008 +tejasmanohar 1946 +xavierdutreilh 1878 +fountainhead 1861 +ARoiD 1610 +nikolay 1401 +Jerzerak 1358 +scratcher28 1349 +xu6148152 1339 +ArtemKulyabin 1240 +yoshuawuyts 1195 +timelyportfolio 1193 +frozzare 1170 +mkhoeini 1106 +kgryte 1098 +nazeeruddinikram 1065 +AUS3RIS 1058 +Doppp 1028 +adolphenom 1009 +marceloboeira 990 +denji 953 +sescobb27 953 +Kaom 950 +se77en 941 +hbm1985 909 +pillowsoft 898 +gkze 891 +kwcto 890 +shunwang 872 +wrjqss 872 +trietptm 864 +hanton 860 +cwonrails 845 +SuriyaaKudoIsc 833 +0x73 820 +gashouse 815 +swhgoon 810 +RincLiu 791 +TetragrammatonHermit 791 +eerie 789 +edersohe 786 +savage69kr 774 +pingjiang 763 +ibmendoza 761 + diff --git a/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars2.out b/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars2.out new file mode 100644 index 0000000000..6736766833 --- /dev/null +++ b/regression-test/data/variant_p2/sql/whoAreAllThosePeopleGivingStars2.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !whoAreAllThosePeopleGivingStars2 -- +cliffordfajardo 98 + diff --git a/regression-test/suites/nereids_syntax_p0/explain.groovy b/regression-test/suites/nereids_syntax_p0/explain.groovy index d5621531c0..f0554473ec 100644 --- a/regression-test/suites/nereids_syntax_p0/explain.groovy +++ b/regression-test/suites/nereids_syntax_p0/explain.groovy @@ -63,7 +63,7 @@ suite("nereids_explain") { when 1>1 then cast(1 as float) else 0.0 end; """ - contains "SlotDescriptor{id=0, col=null, colUniqueId=null, type=DOUBLE, nullable=false, isAutoIncrement=false}" + contains "SlotDescriptor{id=0, col=null, colUniqueId=null, type=DOUBLE, nullable=false, isAutoIncrement=false, subColPath=null}" } def explainStr = sql("select sum(if(lo_tax=1,lo_tax,0)) from lineorder where false").toString() diff --git a/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs1.sql b/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs1.sql new file mode 100644 index 0000000000..0f89de3394 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs1.sql @@ -0,0 +1,14 @@ +SELECT + cast(v:repo.name as string), + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string)= 'opened') AND (cast(v:repo.name as string) IN ('rspec/rspec-core', 'golden-warning/giraffedraft-server', 'apache/spark')) +)) AND (lower(cast(v:repo.name as string)) NOT LIKE '%clickhouse%') +GROUP BY cast(v:repo.name as string) +ORDER BY authors DESC, prs DESC, cast(v:repo.name as string) DESC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs2.sql b/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs2.sql new file mode 100644 index 0000000000..913953c271 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/affinityByIssuesAndPRs2.sql @@ -0,0 +1,14 @@ +SELECT + cast(v:repo.name as string), + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:repo.name as string) IN ('No-CQRT/GooGuns', 'ivolunteerph/ivolunteerph', 'Tribler/tribler')) +)) AND (lower(cast(v:repo.name as string)) NOT LIKE '%clickhouse%') +GROUP BY cast(v:repo.name as string) +ORDER BY authors DESC, prs DESC, cast(v:repo.name as string) ASC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/authorsWithTheMostPushes.sql b/regression-test/suites/variant_github_events_p0/authorsWithTheMostPushes.sql new file mode 100644 index 0000000000..006a9ae9d3 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/authorsWithTheMostPushes.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:actor.login as string), + count() AS c, + count(distinct cast(v:repo.name as string)) AS repos + FROM github_events + WHERE cast(v:type as string) = 'PushEvent' + GROUP BY cast(v:actor.login as string) + ORDER BY c DESC, 1, 3 + LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/countingStar1.sql b/regression-test/suites/variant_github_events_p0/countingStar1.sql new file mode 100644 index 0000000000..db71c34c4e --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/countingStar1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' diff --git a/regression-test/suites/variant_github_events_p0/countingStar2.sql b/regression-test/suites/variant_github_events_p0/countingStar2.sql new file mode 100644 index 0000000000..892efa7d72 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/countingStar2.sql @@ -0,0 +1 @@ +SELECT cast(v:payload.action as string), count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:payload.action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/countingStar3.sql b/regression-test/suites/variant_github_events_p0/countingStar3.sql new file mode 100644 index 0000000000..9370e01530 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/countingStar3.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:repo.name as string) IN ('apache/spark', 'GunZi200/Memory-Colour', 'isohuntto/openbay', 'wasabeef/awesome-android-ui') GROUP BY cast(v:payload.action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/distributionOfRepositoriesByStarCount.sql b/regression-test/suites/variant_github_events_p0/distributionOfRepositoriesByStarCount.sql new file mode 100644 index 0000000000..1e252ef4a7 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/distributionOfRepositoriesByStarCount.sql @@ -0,0 +1,14 @@ +SELECT + pow(10, floor(log10(c))) AS stars, + count(distinct k) +FROM +( + SELECT + cast(v:repo.name as string) as k, + count() AS c + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) +) t +GROUP BY stars +ORDER BY stars ASC diff --git a/regression-test/suites/variant_github_events_p0/githubRoulette.sql b/regression-test/suites/variant_github_events_p0/githubRoulette.sql new file mode 100644 index 0000000000..cda527b69f --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/githubRoulette.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' ORDER BY cast(v:created_at as datetime), cast(v:repo.name as string) LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql new file mode 100644 index 0000000000..19c4bb0358 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars, cast(v:repo.name as string) DESC LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql new file mode 100644 index 0000000000..5e4efa99ad --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql @@ -0,0 +1,30 @@ +-- FIXME: UNSTABLE +-- SELECT +-- repo, +-- year, +-- cnt +-- FROM +-- ( +-- SELECT +-- row_number() OVER (PARTITION BY year ORDER BY cnt DESC) AS r, +-- repo, +-- year, +-- cnt +-- FROM +-- ( +-- SELECT +-- lower(cast(v:repo.name as string)) AS repo, +-- year(cast(v:created_at as datetime)) AS year, +-- count() AS cnt +-- FROM github_events +-- WHERE (cast(v:type as string) = 'WatchEvent') AND (year(cast(v:created_at as datetime)) >= 2015) +-- GROUP BY +-- repo, +-- year +-- ) t +-- ) t2 +-- WHERE r <= 10 +-- ORDER BY +-- year ASC, +-- cnt DESC, repo +-- \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql b/regression-test/suites/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql new file mode 100644 index 0000000000..97874a2d1e --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql @@ -0,0 +1,2 @@ +SELECT year(cast(v:created_at as datetime)) AS year, count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY year ORDER BY year + diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments1.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments1.sql new file mode 100644 index 0000000000..9f4e7db0a1 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'IssueCommentEvent' diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments2.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments2.sql new file mode 100644 index 0000000000..293b171c13 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE cast(v:type as string) = 'IssueCommentEvent' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments3.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments3.sql new file mode 100644 index 0000000000..cb1926ad20 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments3.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + comments, + issues, + round(comments / issues, 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + count() AS comments, + count(distinct cast(v:payload.issue.`number` as int)) AS issues + FROM github_events + WHERE cast(v:type as string) = 'IssueCommentEvent' + GROUP BY cast(v:repo.name as string) +) t +ORDER BY comments DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments4.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments4.sql new file mode 100644 index 0000000000..c9914fb203 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments4.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') +GROUP BY cast(v:repo.name as string), number +ORDER BY comments DESC, number ASC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments5.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments5.sql new file mode 100644 index 0000000000..04a0f61fa0 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments5.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') AND (cast(v:payload.issue.`number` as int) > 10) +GROUP BY cast(v:repo.name as string), number +ORDER BY comments DESC, cast(v:repo.name as string), number +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments6.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments6.sql new file mode 100644 index 0000000000..155f6f695f --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments6.sql @@ -0,0 +1,11 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') AND (cast(v:payload.issue.`number` as int) > 10) +GROUP BY cast(v:repo.name as string), number +HAVING authors >= 4 +ORDER BY comments DESC, cast(v:repo.name as string) +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments7.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments7.sql new file mode 100644 index 0000000000..07b91250aa --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments7.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + count() AS comments, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE cast(v:type as string) = 'CommitCommentEvent' +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC, 1, 3 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments8.sql b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments8.sql new file mode 100644 index 0000000000..7a3696f803 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/issuesWithTheMostComments8.sql @@ -0,0 +1,13 @@ +-- SELECT +-- concat('https://github.com/', cast(v:repo.name as string), '/commit/', cast(v:payload.commit_id as string)) URL, +-- cast(v:payload.commit_id as string) AS commit_id, +-- count() AS comments, +-- count(distinct cast(v:actor.login as string)) AS authors +-- FROM github_events +-- WHERE (cast(v:type as string) = 'CommitCommentEvent') AND commit_id != "" +-- GROUP BY +-- cast(v:repo.name as string), +-- commit_id +-- HAVING authors >= 10 +-- ORDER BY count() DESC, URL, authors +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/load.groovy b/regression-test/suites/variant_github_events_p0/load.groovy new file mode 100644 index 0000000000..5dec74a736 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/load.groovy @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_github_events_p0", "variant_type"){ + def load_json_data = {table_name, file_name -> + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def table_name = "github_events" + sql """DROP TABLE IF EXISTS ${table_name}""" + table_name = "github_events" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant, + INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + // 2015 + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-2.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-3.json'}""") + // 2022 + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-16.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-10.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-22.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-23.json'}""") + // TODO fix compaction issue, this case could be stable + qt_sql """select cast(v:payload.pull_request.additions as int) from github_events where cast(v:repo.name as string) = 'xpressengine/xe-core' order by 1;""" + // TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse) +} \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/mostForkedRepositories.sql b/regression-test/suites/variant_github_events_p0/mostForkedRepositories.sql new file mode 100644 index 0000000000..85cf5ef0de --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/mostForkedRepositories.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS forks FROM github_events WHERE cast(v:type as string) = 'ForkEvent' GROUP BY cast(v:repo.name as string) ORDER BY forks DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/mostPopularCommentsOnGithub.sql b/regression-test/suites/variant_github_events_p0/mostPopularCommentsOnGithub.sql new file mode 100644 index 0000000000..ff617de4a9 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/mostPopularCommentsOnGithub.sql @@ -0,0 +1 @@ +SELECT cast(v:payload.comment.body as string), count() FROM github_events WHERE cast(v:payload.comment.body as string) != "" AND length(cast(v:payload.comment.body as string)) < 100 GROUP BY cast(v:payload.comment.body as string) ORDER BY count(), 1 DESC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfRepositories.sql b/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfRepositories.sql new file mode 100644 index 0000000000..b070f0714a --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfRepositories.sql @@ -0,0 +1,14 @@ +SELECT + lower(split_part(repo_name, '/', 1)) AS org, + count(distinct repo_name) AS repos +FROM +( + SELECT cast(v:repo.name as string) as repo_name + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) + HAVING count() >= 10 +) t +GROUP BY org +ORDER BY repos DESC, org ASC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfStars.sql b/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfStars.sql new file mode 100644 index 0000000000..e842751ba1 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/organizationsByTheNumberOfStars.sql @@ -0,0 +1,8 @@ +SELECT + lower(split_part(cast(v:repo.name as string), '/', 1)) AS org, + count() AS stars +FROM github_events +WHERE cast(v:type as string) = 'WatchEvent' +GROUP BY org +ORDER BY stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/organizationsByTheSizeOfCommunity.sql b/regression-test/suites/variant_github_events_p0/organizationsByTheSizeOfCommunity.sql new file mode 100644 index 0000000000..8b3ae0baae --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/organizationsByTheSizeOfCommunity.sql @@ -0,0 +1,23 @@ +-- SELECT +-- lower(split_part(cast(v:repo.name as string), '/', 1)) AS org, +-- count(distinct cast(v:actor.login as string)) AS authors, +-- count(distinct pr_author) AS pr_authors, +-- count(distinct issue_author) AS issue_authors, +-- count(distinct comment_author) AS comment_authors, +-- count(distinct review_author) AS review_authors, +-- count(distinct push_author) AS push_authors +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'PullRequestEvent' THEN cast(v:actor.login as string) ELSE NULL END pr_author, +-- CASE WHEN cast(v:type as string) = 'IssuesEvent' THEN cast(v:actor.login as string) ELSE NULL END issue_author, +-- CASE WHEN cast(v:type as string) = 'IssueCommentEvent' THEN cast(v:actor.login as string) ELSE NULL END comment_author, +-- CASE WHEN cast(v:type as string) = 'PullRequestReviewCommentEvent' THEN cast(v:actor.login as string) ELSE NULL END review_author, +-- CASE WHEN cast(v:type as string) = 'PushEvent' THEN cast(v:actor.login as string) ELSE NULL END push_author +-- FROM github_events +-- WHERE cast(v:type as string) IN ('PullRequestEvent', 'IssuesEvent', 'IssueCommentEvent', 'PullRequestReviewCommentEvent', 'PushEvent') +-- ) t +-- GROUP BY org +-- ORDER BY authors DESC diff --git a/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks1.sql b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks1.sql new file mode 100644 index 0000000000..b62668b9fa --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks1.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 3) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY forks DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks2.sql b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks2.sql new file mode 100644 index 0000000000..fe286c489c --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 3) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 20) AND (forks >= 10) +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks3.sql b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks3.sql new file mode 100644 index 0000000000..c8502f59f4 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks3.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(fork) / sum(star), 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 4) AND (forks > 4) +ORDER BY ratio DESC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks4.sql b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks4.sql new file mode 100644 index 0000000000..f1c405efcd --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks4.sql @@ -0,0 +1,13 @@ +SELECT + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string), + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t diff --git a/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks5.sql b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks5.sql new file mode 100644 index 0000000000..22606e7eef --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/proportionsBetweenStarsAndForks5.sql @@ -0,0 +1,21 @@ +SELECT + sum(forks) AS forks, + sum(stars) AS stars, + round(sum(stars) / sum(forks), 2) AS ratio +FROM +( + SELECT + sum(fork) AS forks, + sum(star) AS stars + FROM + ( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') + ) t + GROUP BY repo_name + HAVING stars > 10 +) t2 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesByAmountOfModifiedCode.sql b/regression-test/suites/variant_github_events_p0/repositoriesByAmountOfModifiedCode.sql new file mode 100644 index 0000000000..f27a32bbff --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesByAmountOfModifiedCode.sql @@ -0,0 +1,12 @@ +SELECT + cast(v:repo.name as string) as repo_name, + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors, + sum(cast(v:payload.pull_request.additions as int)) AS adds, + sum(cast(v:payload.pull_request.deletions as int)) AS dels +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:payload.pull_request.additions as int) < 10000) AND (cast(v:payload.pull_request.deletions as int) < 10000) +GROUP BY repo_name +HAVING (adds / dels) < 10 +ORDER BY adds + dels DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesByTheNumberOfPushes.sql b/regression-test/suites/variant_github_events_p0/repositoriesByTheNumberOfPushes.sql new file mode 100644 index 0000000000..7eb47a6594 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesByTheNumberOfPushes.sql @@ -0,0 +1,17 @@ +SELECT + cast(v:repo.name as string), + count() AS pushes, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PushEvent') AND (cast(v:repo.name as string) IN +( + SELECT cast(v:repo.name as string) + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) + ORDER BY count() DESC + LIMIT 10000 +)) +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC, cast(v:repo.name as string) +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.sql new file mode 100644 index 0000000000..af1faa7258 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE lower(cast(v:payload.comment.body as string)) LIKE '%apache%' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, cast(v:repo.name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.sql new file mode 100644 index 0000000000..af974bcf66 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithClickhouse_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(v:payload.comment.body as string)) LIKE '%apache%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(v:payload.comment.body as string)) LIKE '%apache%') OR (cast(v:type as string) = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC, repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments1.sql new file mode 100644 index 0000000000..7c3aa1931a --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE lower(cast(v:payload.comment.body as string)) LIKE '%spark%' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, cast(v:repo.name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments2.sql new file mode 100644 index 0000000000..76da89584b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithDoris_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(v:payload.comment.body as string)) LIKE '%spark%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(v:payload.comment.body as string)) LIKE '%spark%') OR (cast(v:type as string) = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC,repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.sql new file mode 100644 index 0000000000..e8ba7153a7 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheHighestGrowthYoY.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + round(sum(created_at_2022) / sum(created_at_2015), 3) AS yoy, + min(created_at) AS first_seen +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE year(cast(v:created_at as datetime)) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(cast(v:created_at as datetime)) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + cast(v:created_at as datetime) as created_at + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2023-01-01 00:00:00') AND (stars2022 >= 1) and (stars2015 >= 1) +ORDER BY yoy DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql new file mode 100644 index 0000000000..c308adf423 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS c, count(distinct cast(v:actor.login as string)) AS u FROM github_events WHERE cast(v:type as string) = 'IssuesEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY c DESC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql new file mode 100644 index 0000000000..fb977cb817 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY c DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql new file mode 100644 index 0000000000..370cd1ba45 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql @@ -0,0 +1,19 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 10 +ORDER BY c, u, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql new file mode 100644 index 0000000000..88d7dca5c3 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY u, c, stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql new file mode 100644 index 0000000000..dab00b8130 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string) as repo_name, count(), count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY 2,1,3 DESC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql new file mode 100644 index 0000000000..741607d4df --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count(), count(distinct cast(v:actor.login as string)) AS u FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY u DESC, 2 DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql new file mode 100644 index 0000000000..404b2cf765 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(invitation) AS invitations, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'MemberEvent' THEN 1 ELSE 0 END AS invitation, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('MemberEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 2 +ORDER BY invitations DESC, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql new file mode 100644 index 0000000000..d94bcb149b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql @@ -0,0 +1,13 @@ +SELECT + repo_name, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + lower(cast(v:repo.name as string)) as repo_name, + CASE WHEN cast(v:type as string) = 'PushEvent' THEN cast(v:actor.login as string) ELSE NULL END AS actor_login, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +) t +GROUP BY repo_name ORDER BY u, stars, repo_name DESC LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql new file mode 100644 index 0000000000..88a92a06b6 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql @@ -0,0 +1,13 @@ +-- SELECT +-- cast(v:repo.name as string), +-- count(distinct cast(v:actor.login as string)) AS u, +-- sum(star) AS stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- CASE WHEN cast(v:type as string) = 'PushEvent' AND (ref LIKE '%/master' OR ref LIKE '%/main') THEN cast(v:actor.login as string) ELSE NULL END AS cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star +-- FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +-- ) t +-- GROUP BY cast(v:repo.name as string) ORDER BY u DESC LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql new file mode 100644 index 0000000000..28498c7a3e --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql @@ -0,0 +1,16 @@ +-- SELECT +-- cast(v:repo.name as string), +-- count(distinct cast(v:actor.login as string)) AS u, +-- sum(star) AS stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- CASE WHEN cast(v:type as string) = 'PushEvent' AND (ref LIKE '%/master' OR ref LIKE '%/main') THEN cast(v:actor.login as string) ELSE NULL END AS cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star +-- FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +-- ) t +-- GROUP BY cast(v:repo.name as string) +-- HAVING stars >= 100 +-- ORDER BY u DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql new file mode 100644 index 0000000000..c7b81b2408 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql @@ -0,0 +1,25 @@ +SELECT + repo_name, + day, + stars +FROM +( + SELECT + row_number() OVER (PARTITION BY repo_name ORDER BY stars DESC) AS rank, + repo_name, + day, + stars + FROM + ( + SELECT + cast(v:repo.name as string) as repo_name, + to_date(cast(v:created_at as datetime)) AS day, + count() AS stars + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string), day + ) t1 +) t2 +WHERE rank = 1 +ORDER BY stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql new file mode 100644 index 0000000000..9f07d2270d --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql @@ -0,0 +1,25 @@ +-- SELECT +-- cast(v:repo.name as string), +-- day, +-- stars +-- FROM +-- ( +-- SELECT +-- row_number() OVER (PARTITION BY cast(v:repo.name as string) ORDER BY stars DESC) AS rank, +-- cast(v:repo.name as string), +-- day, +-- stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- to_date(cast(v:created_at as datetime)) AS day, +-- count() AS stars +-- FROM github_events +-- WHERE cast(v:type as string) = 'WatchEvent' +-- GROUP BY cast(v:repo.name as string), day +-- ) t1 +-- ) t2 +-- WHERE rank = 1 +-- ORDER BY stars DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql new file mode 100644 index 0000000000..c0ac82e915 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql @@ -0,0 +1 @@ +-- SELECT cast(v:repo.name as string), cast(v:created_at as datetime), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string), cast(v:created_at as datetime) ORDER BY count() DESC LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql new file mode 100644 index 0000000000..5c7d2d7c7b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + max(stars) AS daily_stars, + sum(stars) AS total_stars, + sum(stars) / max(stars) AS rate +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + to_date(cast(v:created_at as datetime)) AS day, + count() AS stars + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY + repo_name, + day +) t +GROUP BY repo_name +ORDER BY rate DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.sql b/regression-test/suites/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.sql new file mode 100644 index 0000000000..2265f4b5e4 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoriesWithTheWorstStagnation_order.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + round(sum(created_at_2022) / sum(created_at_2015), 3) AS yoy, + min(created_at) AS first_seen +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE year(cast(v:created_at as datetime)) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(cast(v:created_at as datetime)) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + cast(v:created_at as datetime) as created_at + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2019-01-01 00:00:00') AND (max(created_at) >= '2020-06-01 00:00:00') AND (stars2015 >= 2) +ORDER BY yoy, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoryAffinityList1.sql b/regression-test/suites/variant_github_events_p0/repositoryAffinityList1.sql new file mode 100644 index 0000000000..99b9f28a2b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoryAffinityList1.sql @@ -0,0 +1,13 @@ +SELECT + cast(v:repo.name as string) as repo_name, + count() AS stars +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN ('apache/spark', 'prakhar1989/awesome-courses')) +)) AND (cast(v:repo.name as string) NOT IN ('ClickHouse/ClickHouse', 'yandex/ClickHouse')) +GROUP BY repo_name +ORDER BY stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/repositoryAffinityList2.sql b/regression-test/suites/variant_github_events_p0/repositoryAffinityList2.sql new file mode 100644 index 0000000000..4c7f36b518 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/repositoryAffinityList2.sql @@ -0,0 +1,23 @@ +SELECT + repo_name, + total_stars, + round(spark_stars / total_stars, 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + count(distinct cast(v:actor.login as string)) AS total_stars + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) NOT IN ('apache/spark')) + GROUP BY repo_name + HAVING total_stars >= 10 +) t1 +JOIN +( + SELECT + count(distinct cast(v:actor.login as string)) AS spark_stars + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN ('apache/spark')) +) t2 +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers1.sql b/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers1.sql new file mode 100644 index 0000000000..01dfd4f0e8 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers1.sql @@ -0,0 +1,13 @@ +SELECT + cast(v:repo.name as string), + count() +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') +)) +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC, cast(v:repo.name as string) +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers2.sql b/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers2.sql new file mode 100644 index 0000000000..500ae5b9a0 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/starsFromHeavyGithubUsers2.sql @@ -0,0 +1,15 @@ +SELECT + cast(v:repo.name as string), + count() +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') + GROUP BY cast(v:actor.login as string) + HAVING count() >= 2 +)) +GROUP BY cast(v:repo.name as string) +ORDER BY 1, count() DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames1.sql b/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames1.sql new file mode 100644 index 0000000000..0603fec97b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames1.sql @@ -0,0 +1 @@ +SELECT count(), cast(v:repo.name as string) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY length(cast(v:repo.name as string)) DESC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames2.sql b/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames2.sql new file mode 100644 index 0000000000..244f58e3ff --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theLongestRepositoryNames2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:repo.name as string) LIKE '%_/_%' GROUP BY cast(v:repo.name as string) ORDER BY length(cast(v:repo.name as string)) ASC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/theMostToughCodeReviews.sql b/regression-test/suites/variant_github_events_p0/theMostToughCodeReviews.sql new file mode 100644 index 0000000000..46a896f795 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theMostToughCodeReviews.sql @@ -0,0 +1,10 @@ +SELECT + concat('https://github.com/', cast(v:repo.name as string), '/pull/') AS URL, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestReviewCommentEvent') AND (cast(v:payload.action as string) = 'created') +GROUP BY + cast(v:repo.name as string), + cast(v:payload.issue.`number` as string) +ORDER BY authors DESC, URL ASC +LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql b/regression-test/suites/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql new file mode 100644 index 0000000000..1d29c3eeb4 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:repo.name as string)) FROM github_events diff --git a/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.sql b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.sql new file mode 100644 index 0000000000..044aa0470b --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub1.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events diff --git a/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.sql b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.sql new file mode 100644 index 0000000000..12776f1bb2 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub2.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' diff --git a/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.sql b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.sql new file mode 100644 index 0000000000..8454603277 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub3.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PushEvent' diff --git a/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.sql b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.sql new file mode 100644 index 0000000000..384b232f4f --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/theTotalNumberOfUsersOnGithub4.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' diff --git a/regression-test/suites/variant_github_events_p0/topLabels1.sql b/regression-test/suites/variant_github_events_p0/topLabels1.sql new file mode 100644 index 0000000000..152ec3ba3a --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/topLabels1.sql @@ -0,0 +1,9 @@ +-- SELECT +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) +-- GROUP BY label +-- ORDER BY c DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/topLabels2.sql b/regression-test/suites/variant_github_events_p0/topLabels2.sql new file mode 100644 index 0000000000..bf404d4da5 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/topLabels2.sql @@ -0,0 +1,9 @@ +-- SELECT +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- GROUP BY label +-- ORDER BY c DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/topLabels3.sql b/regression-test/suites/variant_github_events_p0/topLabels3.sql new file mode 100644 index 0000000000..49e6086166 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/topLabels3.sql @@ -0,0 +1,14 @@ +-- SELECT +-- sum(bug) AS bugs, +-- sum(feature) AS feature, +-- sum(bug) / sum(feature) AS ratio +-- FROM +-- ( +-- SELECT +-- CASE WHEN lower(label) LIKE '%bug%' THEN 1 ELSE 0 END AS bug, +-- CASE WHEN lower(label) LIKE '%feature%' THEN 1 ELSE 0 END AS feature +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- ) t +-- LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/topRepositoriesByStars.sql b/regression-test/suites/variant_github_events_p0/topRepositoriesByStars.sql new file mode 100644 index 0000000000..3dfd35d9d6 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/topRepositoriesByStars.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql b/regression-test/suites/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql new file mode 100644 index 0000000000..0c930308cf --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql @@ -0,0 +1 @@ +SELECT dayofweek(cast(v:created_at as datetime)) AS day, count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY day ORDER BY day diff --git a/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.sql b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.sql new file mode 100644 index 0000000000..63a4dfd15f --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars1.sql @@ -0,0 +1 @@ +SELECT cast(v:actor.login as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:actor.login as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.sql b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.sql new file mode 100644 index 0000000000..0aa67b56d2 --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars2.sql @@ -0,0 +1 @@ +SELECT cast(v:actor.login as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:actor.login as string) = 'cliffordfajardo' GROUP BY cast(v:actor.login as string) ORDER BY stars DESC LIMIT 50 diff --git a/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.sql b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.sql new file mode 100644 index 0000000000..a1e9dab32e --- /dev/null +++ b/regression-test/suites/variant_github_events_p0/whoAreAllThosePeopleGivingStars3.sql @@ -0,0 +1,13 @@ +SELECT + cast(v:repo.name as string), + count() AS stars +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN +( + SELECT cast(v:repo.name as string) + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) = 'cliffordfajardo') +)) +GROUP BY cast(v:repo.name as string) +ORDER BY stars DESC, cast(v:repo.name as string) +LIMIT 50 diff --git a/regression-test/suites/variant_p0/column_name.groovy b/regression-test/suites/variant_p0/column_name.groovy new file mode 100644 index 0000000000..ae409409f5 --- /dev/null +++ b/regression-test/suites/variant_p0/column_name.groovy @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_column_name", "variant_type"){ + def table_name = "var_column_name" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + + sql """insert into ${table_name} values (1, '{"中文" : "中文", "\\\u4E2C\\\u6587": "unicode"}')""" + qt_sql """select v:中文, v:`\\\u4E2C\\\u6587` from ${table_name}""" + // sql """insert into ${table_name} values (2, '{}')""" + sql "truncate table ${table_name}" + sql """insert into ${table_name} values (3, '{"": ""}')""" + qt_sql """select v:`` from ${table_name} order by k""" + sql """insert into ${table_name} values (4, '{"!@#^&*()": "11111"}')""" + qt_sql """select v:`!@#^&*()` from ${table_name} order by k""" + sql """insert into ${table_name} values (5, '{"123": "456", "789": "012"}')""" + qt_sql """select cast(v:`123` as string) from ${table_name} order by k""" + // sql """insert into ${table_name} values (6, '{"\\n123": "t123", "\\\"123": "123"}')""" + // qt_sql """select v:`\\n` from ${table_name} order by k""" + sql """insert into ${table_name} values (7, '{"AA": "UPPER CASE", "aa": "lower case"}')""" + qt_sql """select cast(v:`AA` as string), cast(v:`aa` as string) from ${table_name} order by k""" + +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/complexjson.groovy b/regression-test/suites/variant_p0/complexjson.groovy new file mode 100644 index 0000000000..244da16f00 --- /dev/null +++ b/regression-test/suites/variant_p0/complexjson.groovy @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_complexjson", "variant_type_complex_json") { + def create_table = { table_name -> + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY RANDOM BUCKETS 5 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + } + table_name = "complexjson" + create_table table_name + sql """insert into ${table_name} values (1, '{ + "id": 1, + "key_0":[ + { + "key_1":[ + { + "key_3":[ + {"key_7":1025,"key_6":25.5,"key_4":1048576,"key_5":0.0001048576}, + {"key_7":2,"key_6":"","key_4":null} + ] + } + ] + }, + { + "key_1":[ + { + "key_3":[ + {"key_7":-922337203685477580.8,"key_6":"aqbjfiruu","key_5":-1}, + {"key_7":65537,"key_6":"","key_4":""} + ] + }, + { + "key_3":[ + {"key_7":21474836.48,"key_4":"ghdqyeiom","key_5":1048575} + ] + } + ] + } + ] + }')""" + // qt_sql """SELECT v:key_0.key_1.key_3.key_4, v:key_0.key_1.key_3.key_5, \ + // v:key_0.key_1.key_3.key_6, v:key_0.key_1.key_3.key_7 FROM ${table_name} ORDER BY v:id""" + qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + + table_name = "complexjson2" + create_table table_name + sql """insert into ${table_name} values (1, '{ + "id": 1, + "key_1":[ + { + "key_2":[ + { + "key_3":[ + {"key_8":65537}, + { + "key_4":[ + {"key_5":-0.02}, + {"key_7":1023}, + {"key_7":1,"key_6":9223372036854775807} + ] + }, + { + "key_4":[{"key_7":65537,"key_6":null}] + } + ] + } + ] + } + ] + }')""" + // qt_sql """SELECT \ + // v:key_1.key_2.key_3.key_8, \ + // v:key_1.key_2.key_3.key_4.key_5, \ + // v:key_1.key_2.key_3.key_4.key_6, \ + // v:key_1.key_2.key_3.key_4.key_7 \ + // FROM ${table_name} ORDER BY v:id""" + qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + + table_name = "complexjson3" + create_table table_name + sql """INSERT INTO ${table_name} VALUES (1, '{"key_10":65536,"key_11":"anve","key_0":{"key_1":{"key_2":1025,"key_3":1},"key_4":1,"key_5":256}}')""" + sql """INSERT INTO ${table_name} VALUES (2, '{"key_0":[{"key_12":"buwvq","key_11":0.0000000255}]}')""" + // qt_sql """SELECT k, v:key_10, v:key_11, v:key_0.key_1.key_2, v:key_0.key_1.key_3, v:key_0.key_4, v:key_0.key_5, v:key_0.key_12, v:key_0.key_11 FROM complexjson3 ORDER BY k;""" + qt_sql """SELECT * from ${table_name} order by k""" + + table_name = "complexjson4" + create_table table_name + sql """INSERT INTO ${table_name} VALUES (1, '{ + "id": 1, + "key_0":[ + {"key_1":{"key_2":[1, 2, 3],"key_8":"sffjx"},"key_10":65535,"key_0":-1}, + {"key_10":10.23,"key_0":922337203.685} + ] + }')""" + // qt_sql """SELECT \ + // v:key_0.key_1.key_2, \ + // v:key_0.key_1.key_8, \ + // v:key_0.key_10, \ + // v:key_0.key_0 \ + // FROM ${table_name} ORDER BY v:id""" + qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + + table_name = "complexjson5" + create_table table_name + sql """INSERT INTO ${table_name} VALUES (1, '{ + "id": 1, + "key_0":[ + { + "key_1":[ + { + "key_2": + { + "key_3":[ + {"key_4":255}, + {"key_4":65535}, + {"key_7":255,"key_6":3} + ], + "key_5":[ + {"key_7":"nnpqx","key_6":1}, + {"key_7":255,"key_6":3} + ] + } + } + ] + } + ] + }')""" + // qt_sql """SELECT \ + // v:key_0.key_1.key_2.key_3.key_4, + // v:key_0.key_1.key_2.key_3.key_6, + // v:key_0.key_1.key_2.key_3.key_7, + // v:key_0.key_1.key_2.key_5.key_6, \ + // v:key_0.key_1.key_2.key_5.key_7 + // FROM ${table_name} ORDER BY v:id""" + qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/delete_update.groovy b/regression-test/suites/variant_p0/delete_update.groovy new file mode 100644 index 0000000000..71047fb751 --- /dev/null +++ b/regression-test/suites/variant_p0/delete_update.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_delete_and_update", "variant_type"){ + // MOR + def table_name = "var_delete_update" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1"); + """ + + sql """insert into ${table_name} values (1, '{"a" : 1, "b" : [1], "c": 1.0}')""" + sql """insert into ${table_name} values (2, '{"a" : 2, "b" : [1], "c": 2.0}')""" + sql """insert into ${table_name} values (3, '{"a" : 3, "b" : [3], "c": 3.0}')""" + sql """insert into ${table_name} values (4, '{"a" : 4, "b" : [4], "c": 4.0}')""" + sql """insert into ${table_name} values (5, '{"a" : 5, "b" : [5], "c": 5.0}')""" + + sql "delete from ${table_name} where k = 1" + sql """update ${table_name} set v = '{"updated_value" : 123}' where k = 2""" + qt_sql "select * from ${table_name} order by k" + + // MOW + table_name = "var_delete_update_mow" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant, + vs string + ) + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true"); + """ + sql "insert into var_delete_update_mow select k, cast(v as string), cast(v as string) from var_delete_update" + sql "delete from ${table_name} where k = 1" + sql "delete from ${table_name} where k in (select k from var_delete_update_mow where k in (3, 4, 5))" + // FIXME + // sql """update ${table_name} set vs = '{"updated_value" : 123}' where k = 2""" + // sql """update ${table_name} set v = '{"updated_value" : 123}' where k = 2""" + qt_sql "select * from ${table_name} order by k" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/insert_into_select.groovy b/regression-test/suites/variant_p0/insert_into_select.groovy new file mode 100644 index 0000000000..ffd0c3af0e --- /dev/null +++ b/regression-test/suites/variant_p0/insert_into_select.groovy @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_insert_into_select", "variant_type"){ + def table_name = "insert_into_select" + sql "DROP TABLE IF EXISTS ${table_name}_var" + sql "DROP TABLE IF EXISTS ${table_name}_str" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name}_var ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1"); + """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name}_str ( + k bigint, + v string + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1"); + """ + + sql """insert into ${table_name}_var values (1, '{"a" : 1, "b" : [1], "c": 1.0}')""" + sql """insert into ${table_name}_var values (2, '{"a" : 2, "b" : [1], "c": 2.0}')""" + sql """insert into ${table_name}_var values (3, '{"a" : 3, "b" : [3], "c": 3.0}')""" + sql """insert into ${table_name}_var values (4, '{"a" : 4, "b" : [4], "c": 4.0}')""" + sql """insert into ${table_name}_var values (5, '{"a" : 5, "b" : [5], "c": 5.0}')""" + + sql """insert into ${table_name}_str select * from ${table_name}_var""" + sql """insert into ${table_name}_var select * from ${table_name}_str""" + sql """insert into ${table_name}_var select * from ${table_name}_var""" + qt_sql "select v:a, v:b, v:c from ${table_name}_var order by k" + qt_sql "select v from ${table_name}_str order by k" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy new file mode 100644 index 0000000000..2310e7605f --- /dev/null +++ b/regression-test/suites/variant_p0/load.groovy @@ -0,0 +1,403 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant", "variant_type"){ + + def load_json_data = {table_name, file_name -> + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def verify = { table_name -> + sql "sync" + qt_sql """select count() from ${table_name}""" + } + + def create_table = { table_name, buckets="auto", key_type="DUPLICATE" -> + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + ${key_type} KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS ${buckets} + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + } + + def set_be_config = { key, value -> + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + + try { + + def key_types = ["DUPLICATE", "UNIQUE"] + for (int i = 0; i < key_types.size(); i++) { + def table_name = "simple_variant_${key_types[i]}" + // 1. simple cases + create_table.call(table_name, "auto", key_types[i]) + sql """insert into ${table_name} values (1, '[1]'),(1, '{"a" : 1}');""" + sql """insert into ${table_name} values (2, '[2]'),(1, '{"a" : [[[1]]]}');""" + sql """insert into ${table_name} values (3, '3'),(1, '{"a" : 1}'), (1, '{"a" : [1]}');""" + sql """insert into ${table_name} values (4, '"4"'),(1, '{"a" : "1223"}');""" + sql """insert into ${table_name} values (5, '5.0'),(1, '{"a" : [1]}');""" + sql """insert into ${table_name} values (6, '"[6]"'),(1, '{"a" : ["1", 2, 1.1]}');""" + sql """insert into ${table_name} values (7, '7'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" + sql """insert into ${table_name} values (8, '8.11111'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (9, '"9999"'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (10, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (11, '[123.0]'),(1999, '{"a" : 1, "b" : {"c" : 1}}'),(19921, '{"a" : 1, "b" : 10}');""" + sql """insert into ${table_name} values (12, '[123.2]'),(1022, '{"a" : 1, "b" : 10}'),(1029, '{"a" : 1, "b" : {"c" : 1}}');""" + qt_sql "select k, cast(v:a as array) from ${table_name} where size(cast(v:a as array)) > 0 order by k, cast(v as string);" + // cast v:b as int should be correct + // FIXME: unstable, todo use qt_sql + sql "select k, v, cast(v:b as string) from ${table_name} where length(cast(v:b as string)) > 4 order by k, cast(v as string)" + sql "select k, v from ${table_name} order by k, cast(v as string) limit 5" + sql "select v:b, v:b.c, v from ${table_name} order by k,cast(v as string) desc limit 10000;" + sql "select k, v, v:b.c, v:a from ${table_name} where k > 10 order by k desc limit 10000;" + sql "select v:b from ${table_name} where cast(v:b as int) > 0;" + sql "select cast(v:b as string) from ${table_name} order by k" + verify table_name + } + // FIXME + sql "insert into simple_variant_DUPLICATE select k, cast(v as string) from simple_variant_UNIQUE;" + + // 2. type confilct cases + def table_name = "type_conflict_resolution" + create_table table_name + sql """insert into ${table_name} values (1, '{"c" : "123"}');""" + sql """insert into ${table_name} values (2, '{"c" : 123}');""" + sql """insert into ${table_name} values (3, '{"cc" : [123]}');""" + sql """insert into ${table_name} values (4, '{"cc" : [123.1]}');""" + sql """insert into ${table_name} values (5, '{"ccc" : 123}');""" + sql """insert into ${table_name} values (6, '{"ccc" : 123321}');""" + sql """insert into ${table_name} values (7, '{"cccc" : 123.0}');""" + sql """insert into ${table_name} values (8, '{"cccc" : 123.11}');""" + sql """insert into ${table_name} values (9, '{"ccccc" : [123]}');""" + sql """insert into ${table_name} values (10, '{"ccccc" : [123456789]}');""" + sql """insert into ${table_name} values (11, '{"b" : 1111111111111111}');""" + sql """insert into ${table_name} values (12, '{"b" : 1.222222}');""" + sql """insert into ${table_name} values (13, '{"bb" : 1}');""" + sql """insert into ${table_name} values (14, '{"bb" : 214748364711}');""" + sql """insert into ${table_name} values (15, '{"A" : 1}');""" + qt_sql """select v from type_conflict_resolution order by k;""" + verify table_name + + // 3. simple variant sub column select + table_name = "simple_select_variant" + create_table table_name + sql """insert into ${table_name} values (1, '{"A" : 123}');""" + sql """insert into ${table_name} values (2, '{"A" : 1}');""" + sql """insert into ${table_name} values (4, '{"A" : 123456}');""" + sql """insert into ${table_name} values (8, '{"A" : 123456789101112}');""" + qt_sql_2 "select v:A from ${table_name} order by cast(v:A as int)" + sql """insert into ${table_name} values (12, '{"AA" : [123456]}');""" + sql """insert into ${table_name} values (14, '{"AA" : [123456789101112]}');""" + // qt_sql_3 "select v:AA from ${table_name} where size(v:AA) > 0 order by k" + qt_sql_4 "select v:A, v:AA, v from ${table_name} order by k" + qt_sql_5 "select v:A, v:AA, v, v from ${table_name} where cast(v:A as bigint) > 123 order by k" + + sql """insert into ${table_name} values (16, '{"a" : 123.0, "A" : 191191, "c": 123}');""" + sql """insert into ${table_name} values (18, '{"a" : "123", "c" : 123456}');""" + sql """insert into ${table_name} values (20, '{"a" : 1.10111, "A" : 1800, "c" : [12345]}');""" + // sql """insert into ${table_name} values (12, '{"a" : [123]}, "c": "123456"');""" + sql """insert into ${table_name} values (22, '{"a" : 1.1111, "A" : 17211, "c" : 111111}');""" + sql "sync" + qt_sql_6 "select v:a, v:A from ${table_name} order by cast(v:A as bigint), k" + qt_sql_7 "select k, v:A from ${table_name} where cast(v:A as bigint) >= 1 order by cast(v:A as bigint), k" + + // FIXME: if not cast, then v:a could return "123" or 123 which is none determinately + qt_sql_8 "select cast(v:a as string), v:A from ${table_name} where cast(v:a as json) is null order by k" + // qt_sql_9 "select cast(v:a as string), v:A from ${table_name} where cast(v:A as json) is null order by k" + + // !!! Not found cast function String to Float64 + // qt_sql_10 "select v:a, v:A from ${table_name} where cast(v:a as double) > 0 order by k" + qt_sql_11 "select v:A from ${table_name} where cast(v:A as bigint) > 1 order by k" + + // ----%%---- + qt_sql_12 "select v:A, v from ${table_name} where cast(v:A as bigint) > 1 order by k" + // ----%%---- + qt_sql_13 "select v:a, v:A from simple_select_variant where 1=1 and cast(v:a as json) is null and cast(v:A as bigint) >= 1 order by k;" + qt_sql_14 """select v:a, v:A, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" + + // !!! Not found cast function String to Float64 + // qt_sql_15 "select v:a, v:A from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" + // qt_sql_16 "select v:a, v:A, v:c from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" + + // FIXME: if not cast, then v:a could return "123" or 123 which is none determinately + // not stable at present + // qt_sql_17 "select cast(v:a as json), v:A, v, v:AA from simple_select_variant where cast(v:A as bigint) is null order by k;" + + sql """insert into simple_select_variant values (12, '{"oamama": 1.1}')""" + qt_sql_18 "select cast(v:a as text), v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is null order by k;" + qt_sql_19 """select v:a, v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is not null order by k""" + qt_sql_20 """select v:A from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" + + // !!! Not found cast function String to Float64 + // qt_sql_21 """select v:A, v:a, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:a as double) > 1 order by cast(v:A as bigint);""" + + sql "truncate table simple_select_variant" + sql """insert into simple_select_variant values (11, '{"x": [123456]}');""" + sql """insert into simple_select_variant values (12, '{"x": [123456789101112]}');""" + sql """insert into simple_select_variant values (12, '{"xxx" : 123, "yyy" : 456}');""" + qt_sql_21_1 """select * from simple_select_variant where cast(v:x as json) is null""" + qt_sql_21_2 """select cast(v:x as json) from simple_select_variant where cast(v:x as json) is not null order by k;""" + + // 4. multi variant in single table + table_name = "multi_variant" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v1 variant, + v2 variant, + v3 variant + + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY RANDOM BUCKETS 5 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + sql """insert into ${table_name} values (1, '{"A" : 123}', '{"B" : 123}', '{"C" : 456}');""" + sql """insert into ${table_name} values (2, '{"C" : "123"}', '{"D" : [123]}', '{"E" : 789}');""" + sql """insert into ${table_name} values (3, '{"C" : "123"}', '{"C" : [123]}', '{"E" : "789"}');""" + sql "sync" + verify table_name + qt_sql_22 "select v1:A from multi_variant order by k;" + qt_sql_23 "select v2:D from multi_variant order by k;" + qt_sql_24 "select v2:C from multi_variant order by k;" + + // 5. multi tablets concurrent load + table_name = "t_json_parallel" + create_table table_name + sql """INSERT INTO t_json_parallel SELECT *, '{"k1":1, "k2": "some", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "50000");""" + qt_sql_25 """ SELECT sum(cast(v:k1 as int)), sum(cast(v:k4 as double)), sum(cast(json_extract(v:k5, "\$.[0].[0]") as int)) from t_json_parallel; """ + //50000 61700000 55000.00000000374 6150000 + // 7. gh data + table_name = "ghdata" + create_table table_name + load_json_data.call(table_name, """${getS3Url() + '/load/ghdata_sample.json'}""") + qt_sql_26 "select count() from ${table_name}" + + // FIXME: this case it not passed + // // 8. json empty string + // // table_name = "empty_string" + // // create_table table_name + // // sql """INSERT INTO empty_string VALUES (1, ''), (2, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" + // // sql """INSERT INTO empty_string VALUES (3, null), (4, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" + // // qt_sql_27 "SELECT * FROM ${table_name} ORDER BY k;" + + // // // 9. btc data + // // table_name = "btcdata" + // // create_table table_name + // // load_json_data.call(table_name, """${getS3Url() + '/load/btc_transactions.json'}""") + // // qt_sql_28 "select count() from ${table_name}" + + // 10. alter add variant + table_name = "alter_variant" + create_table table_name + sql """INSERT INTO ${table_name} VALUES (1, ''), (1, '{"k1": 1, "k2": "v1"}'), (1, '{}'), (1, '{"k1": 2}');""" + sql "alter table ${table_name} add column v2 variant default null" + sql """INSERT INTO ${table_name} VALUES (1, '{"kyyyy" : "123"}', '{"kxkxkxkx" : [123]}'), (1, '{"kxxxx" : 123}', '{"xxxxyyyy": 123}');""" + qt_sql_29_1 """select * from alter_variant where length(cast(v2 as string)) > 2 and cast(v2 as string) != 'null' order by k, cast(v as string), cast(v2 as string);""" + verify table_name + + // 11. boolean values + table_name = "boolean_values" + create_table table_name + sql """INSERT INTO ${table_name} VALUES (1, ''), (2, '{"k1": true, "k2": false}'), (3, '{}'), (4, '{"k1": false}');""" + verify table_name + + // 12. jsonb values + table_name = "jsonb_values" + create_table table_name + sql """insert into ${table_name} values (1, '{"a" : ["123", 123, [123]]}')""" + // FIXME array -> jsonb will parse error + // sql """insert into ${table_name} values (2, '{"a" : ["123"]}')""" + sql """insert into ${table_name} values (3, '{"a" : "123"}')""" + sql """insert into ${table_name} values (4, '{"a" : 123456}')""" + sql """insert into ${table_name} values (5, '{"a" : [123, "123", 1.11111]}')""" + sql """insert into ${table_name} values (6, '{"a" : [123, 1.11, "123"]}')""" + sql """insert into ${table_name} values(7, '{"a" : [123, {"xx" : 1}], "b" : {"c" : 456, "d" : null, "e" : 7.111}}')""" + // FIXME data bellow is invalid at present + // sql """insert into ${table_name} values (8, '{"a" : [123, 111........]}')""" + sql """insert into ${table_name} values (9, '{"a" : [123, {"a" : 1}]}')""" + sql """insert into ${table_name} values (10, '{"a" : [{"a" : 1}, 123]}')""" + qt_sql_29 "select cast(v:a as string) from ${table_name} order by k" + // b? 7.111 [123,{"xx":1}] {"b":{"c":456,"e":7.111}} 456 + qt_sql_30 "select v:b.e, v:a, v:b, v:b.c from jsonb_values where cast(v:b.e as double) > 1;" + + // 13. sparse columns + table_name = "sparse_columns" + create_table table_name + sql """insert into sparse_columns select 0, '{"a": 11245, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}}' as json_str + union all select 0, '{"a": 1123}' as json_str union all select 0, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") limit 4096 ;""" + qt_sql_30 """ select v from sparse_columns where json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" + sql "truncate table sparse_columns" + sql """insert into sparse_columns select 0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}' as json_str + union all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : {"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = "4096") limit 4096 ;""" + qt_sql_31 """ select v from sparse_columns where json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" + sql "truncate table sparse_columns" + + // 12. streamload remote file + table_name = "logdata" + create_table.call(table_name, "4") + sql "set enable_two_phase_read_opt = false;" + // no sparse columns + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1") + load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + qt_sql_32 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v, "\$.json.parseFailed") != 'null' order by k limit 1;""" + qt_sql_32_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + sql "truncate table ${table_name}" + + // 0.95 default ratio + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;""" + qt_sql_33_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + sql "truncate table ${table_name}" + + // always sparse column + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.85") + load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;""" + sql "truncate table ${table_name}" + qt_sql_35 """select json_extract(v,"\$.json.parseFailed") from logdata where k = 162 and json_extract(v,"\$.json.parseFailed") != 'null';""" + qt_sql_35_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + + // TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse) + // unique table + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + table_name = "github_events_unique" + sql """DROP TABLE IF EXISTS ${table_name}""" + table_name = "github_events" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (2, '{"b" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (2, '{"c" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (3, '{"d" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (3, '{"e" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (4, '{"f" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (4, '{"g" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (5, '{"h" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (5, '{"i" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (6, '{"j" : 1}'), (1, '{"a" : 1}')""" + sql """insert into ${table_name} values (6, '{"k" : 1}'), (1, '{"a" : 1}')""" + sql "select * from ${table_name}" + qt_sql_36_1 "select v:a, v:b, v:c from ${table_name} order by k limit 10" + sql "DELETE FROM ${table_name} WHERE k=1" + sql "select * from ${table_name}" + qt_sql_36_2 "select * from ${table_name} where k > 3 order by k desc limit 10" + sql "insert into ${table_name} select * from ${table_name}" + sql """UPDATE ${table_name} set v = '{"updated_value" : 10}' where k = 2""" + qt_sql_36_3 """select * from ${table_name} where k = 2""" + + + // delete sign + load_json_data.call(table_name, """delete.json""") + + // FIXME + // // filter invalid variant + // table_name = "invalid_variant" + // set_be_config.call("max_filter_ratio_for_variant_parsing", "1") + // create_table.call(table_name, "4") + // sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a" 1}')""" + // sql """insert into ${table_name} values (1, '{"a" 1}'), (1, '{"a" 1}')""" + // set_be_config.call("max_filter_ratio_for_variant_parsing", "0.05") + // sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a" 1}')""" + // sql """insert into ${table_name} values (1, '{"a" 1}'), (1, '{"a" 1}')""" + // sql "select * from ${table_name}" + + // test all sparse columns + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + table_name = "all_sparse_columns" + create_table.call(table_name, "1") + sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a": "1"}')""" + sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a": ""}')""" + qt_sql_37 "select * from ${table_name} order by k, cast(v as string)" + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + + // test mow with delete + table_name = "variant_mow" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + k1 string, + v variant + ) + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false", "enable_unique_key_merge_on_write" = "true"); + """ + sql """insert into ${table_name} values (1, "abc", '{"a" : 1}'), (1, "cde", '{"b" : 1}')""" + sql """insert into ${table_name} values (2, "abe", '{"c" : 1}')""" + sql """insert into ${table_name} values (3, "abd", '{"d" : 1}')""" + sql "delete from ${table_name} where k in (select k from variant_mow where k in (1, 2))" + qt_sql_38 "select * from ${table_name} order by k" + + // read text from sparse col + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + sql """insert into sparse_columns select 0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}' as json_str + union all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : {"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = "4096") limit 4096 ;""" + qt_sql_31 """select cast(v:xxxx as string) from sparse_columns where cast(v:xxxx as string) != 'null' limit 1;""" + sql "truncate table sparse_columns" + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + } finally { + // reset flags + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + } +} diff --git a/regression-test/suites/variant_p0/multi_var.groovy b/regression-test/suites/variant_p0/multi_var.groovy new file mode 100644 index 0000000000..1a034b9be3 --- /dev/null +++ b/regression-test/suites/variant_p0/multi_var.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_multi_var", "variant_type"){ + def table_name = "multi_variants" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1"); + """ + sql """INSERT INTO ${table_name} SELECT *, '{"k1":1, "k2": "hello world", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "101")""" + sql """INSERT INTO ${table_name} SELECT *, '{"k7":123, "k8": "elden ring", "k9" : 1.1112, "k10" : [1.12], "k11" : ["moon"]}' FROM numbers("number" = "203") where number > 100""" + sql """INSERT INTO ${table_name} SELECT *, '{"k7":123, "k8": "elden ring", "k9" : 1.1112, "k10" : [1.12], "k11" : ["moon"]}' FROM numbers("number" = "411") where number > 200""" + sql "alter table ${table_name} add column v2 variant default null" + sql """INSERT INTO ${table_name} select k, v, v from ${table_name}""" + sql "alter table ${table_name} add column v3 variant default null" + sql """INSERT INTO ${table_name} select k, v, v, v from ${table_name}""" + sql "alter table ${table_name} add column ss string default null" + sql """INSERT INTO ${table_name} select k, v, v, v, v from ${table_name}""" + sql """DELETE FROM ${table_name} where k = 1""" + qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} order by k, 1, 2, 3, 4, 5, 6 limit 10""" + qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} where k > 200 order by k, 1, 2, 3, 4, 5, 6 limit 10""" + qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} where k > 300 order by k, 1, 2, 3, 4, 5, 6 limit 10""" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/schema_change.groovy b/regression-test/suites/variant_p0/schema_change.groovy new file mode 100644 index 0000000000..9a9048ba5d --- /dev/null +++ b/regression-test/suites/variant_p0/schema_change.groovy @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_schema_change", "variant_type"){ + def table_name = "variant_schema_change" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1"); + """ + def timeout = 60000 + def delta_time = 1000 + def useTime = 0 + def wait_for_latest_op_on_table_finish = { tableName, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time){ + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${tableName}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(3000) // wait change table state to normal + logger.info(tableName + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") + } + // add, drop columns + sql """INSERT INTO ${table_name} SELECT *, '{"k1":1, "k2": "hello world", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "4096")""" + sql "alter table ${table_name} add column v2 variant default null" + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name}""" + sql "alter table ${table_name} drop column v2" + sql """INSERT INTO ${table_name} SELECT k, v from ${table_name}""" + qt_sql """select v:k1 from ${table_name} order by k limit 10""" + sql "alter table ${table_name} add column vs string default null" + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name}""" + qt_sql """select v:k1 from ${table_name} order by k desc limit 10""" + + // add, drop index + sql "alter table ${table_name} add index btm_idxk (k) using bitmap ;" + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name}""" + wait_for_latest_op_on_table_finish(table_name, timeout) + // drop column is linked schema change + sql "drop index btm_idxk on ${table_name};" + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name} limit 1024""" + wait_for_latest_op_on_table_finish(table_name, timeout) + qt_sql """select v:k1 from ${table_name} order by k desc limit 10""" + + // add, drop materialized view + createMV("""create materialized view var_order as select vs, k, v from ${table_name} order by vs""") + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name} limit 4096""" + createMV("""create materialized view var_cnt as select k, count(k) from ${table_name} group by k""") + sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name} limit 8101""" + sql """DROP MATERIALIZED VIEW var_cnt ON ${table_name}""" + sql """INSERT INTO ${table_name} SELECT k, v,v from ${table_name} limit 1111""" + qt_sql """select v:k1, cast(v:k2 as string) from ${table_name} order by k desc limit 10""" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/sql/gh_data.sql b/regression-test/suites/variant_p0/sql/gh_data.sql new file mode 100644 index 0000000000..72a3adb01c --- /dev/null +++ b/regression-test/suites/variant_p0/sql/gh_data.sql @@ -0,0 +1,13 @@ +set exec_mem_limit=8G; +SELECT count() from ghdata; +SELECT cast(v:repo.name as string), count() AS stars FROM ghdata WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, cast(v:repo.name as string) LIMIT 5; +SELECT max(cast(cast(v:`id` as string) as bigint)) FROM ghdata; +SELECT sum(cast(cast(v:`id` as string) as bigint)) FROM ghdata; +SELECT sum(cast(v:payload.member.id as bigint)) FROM ghdata; +SELECT sum(cast(v:payload.pull_request.milestone.creator.site_admin as bigint)) FROM ghdata; +SELECT sum(length(v:payload.pull_request.base.repo.html_url)) FROM ghdata; +-- SELECT v:payload.commits.author.name FROM ghdata ORDER BY k LIMIT 10; +SELECT v:payload.member.id FROM ghdata where cast(v:payload.member.id as string) is not null ORDER BY k LIMIT 10; +-- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral view explode(cast(v:payload.commits.author.name as array)) tm1 as e order by k limit 5; +select k, v from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; +SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where cast(v:payload.member.id as bigint) is not null group by cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10; \ No newline at end of file diff --git a/regression-test/suites/variant_p0/with_index/load.groovy b/regression-test/suites/variant_p0/with_index/load.groovy new file mode 100644 index 0000000000..e124629169 --- /dev/null +++ b/regression-test/suites/variant_p0/with_index/load.groovy @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_with_index", "nonConcurrent"){ + def set_be_config = { key, value -> + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + + def delta_time = 1000 + def useTime = 0 + def wait_for_latest_op_on_table_finish = { tableName, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time){ + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${tableName}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(3000) // wait change table state to normal + logger.info(tableName + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") + } + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0") + set_be_config.call("variant_threshold_rows_to_estimate_sparse_column", "0") + def table_name = "var_with_index" + sql "DROP TABLE IF EXISTS var_with_index" + sql """ + CREATE TABLE IF NOT EXISTS var_with_index ( + k bigint, + v variant, + inv string, + INDEX idx(inv) USING INVERTED PROPERTIES("parser"="standard") COMMENT '' + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')""" + qt_sql_inv_1 "select v:a from var_with_index where inv match 'hello' order by k" + qt_sql_inv_2 "select v:a from var_with_index where inv match 'hello' and cast(v:a as int) > 0 order by k" + qt_sql_inv_3 "select * from var_with_index where inv match 'hello' and cast(v:a as int) > 0 order by k" + sql "truncate table var_with_index" + // set back configs + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") + set_be_config.call("variant_threshold_rows_to_estimate_sparse_column", "100") + // sql "truncate table ${table_name}" + sql """insert into var_with_index values(1, '{"a1" : 0, "b1": 3}', 'hello world'), (2, '{"a2" : 123}', 'world'),(3, '{"a3" : 123}', 'hello world')""" + sql """insert into var_with_index values(4, '{"b1" : 0, "b2": 3}', 'hello world'), (5, '{"b2" : 123}', 'world'),(6, '{"b3" : 123}', 'hello world')""" + def drop_result = sql """ + ALTER TABLE var_with_index + drop index idx + """ + logger.info("drop index " + "${table_name}" + "; result: " + drop_result) + def timeout = 60000 + wait_for_latest_op_on_table_finish(table_name, timeout) + show_result = sql "show index from ${table_name}" + assertEquals(show_result.size(), 0) + qt_sql_inv4 """select v:a1 from ${table_name} where cast(v:a1 as int) = 0""" + qt_sql_inv5 """select * from ${table_name} order by k""" + sql "create index inv_idx on ${table_name}(`inv`) using inverted" + wait_for_latest_op_on_table_finish(table_name, timeout) + show_result = sql "show index from ${table_name}" + assertEquals(show_result.size(), 1) + sql """insert into var_with_index values(7, '{"a1" : 0, "b1": 3}', 'hello world'), (8, '{"a2" : 123}', 'world'),(9, '{"a3" : 123}', 'hello world')""" + qt_sql_inv6 """select * from ${table_name} order by k desc limit 4""" + + sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')""" + + // alter bitmap index + sql "alter table var_with_index add index btm_idx (inv) using bitmap ;" + sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')""" + sql "select * from var_with_index order by k limit 4" + wait_for_latest_op_on_table_finish(table_name, timeout) + sql "alter table var_with_index add index btm_idxk (k) using bitmap ;" + sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')""" + sql "select * from var_with_index order by k limit 4" + wait_for_latest_op_on_table_finish(table_name, timeout) +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy b/regression-test/suites/variant_p0/with_index/var_index.groovy new file mode 100644 index 0000000000..844b83e1a2 --- /dev/null +++ b/regression-test/suites/variant_p0/with_index/var_index.groovy @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_var_index", "variant_type"){ + def table_name = "var_index" + sql "DROP TABLE IF EXISTS var_index" + sql """ + CREATE TABLE IF NOT EXISTS var_index ( + k bigint, + v variant, + INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + + sql """insert into var_index values(1, '{"a" : 123, "b" : "xxxyyy", "c" : 111999111}')""" + sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world", "c" : 1181111}')""" + sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello wworld", "c" : 11111}')""" + sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx world", "c" : 8181111}')""" + qt_sql """select * from var_index where cast(v:a as smallint) > 123 and cast(v:b as string) match 'hello' and cast(v:c as int) > 1024 order by k""" + sql """insert into var_index values(5, '{"a" : 123456789, "b" : 123456, "c" : 8181111}')""" + qt_sql """select * from var_index where cast(v:a as int) > 123 and cast(v:b as string) match 'hello' and cast(v:c as int) > 11111 order by k""" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p2/github_events_advance.groovy b/regression-test/suites/variant_p2/github_events_advance.groovy new file mode 100644 index 0000000000..48db7a50d0 --- /dev/null +++ b/regression-test/suites/variant_p2/github_events_advance.groovy @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("github_event_advance_p2", "variant_type,p2"){ + + def load_json_data = {table_name, file_name -> + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def create_table = {table_name, buckets="auto" -> + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + id BIGINT NOT NULL, + type VARCHAR(30) NULL, + actor VARIANT NULL, + repo VARIANT NULL, + payload VARIANT NULL, + public BOOLEAN NULL, + created_at DATETIME NULL, + org JSON NULL + -- INDEX idx_payload(payload) USING INVERTED PROPERTIES("parser" = "english") COMMENT '', + -- INDEX idx_repo(repo) USING INVERTED PROPERTIES("parser" = "english") COMMENT '', + -- INDEX idx_actor(actor) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + ) + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(id) BUCKETS ${buckets} + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + } + + def set_be_config = { key, value -> + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + + try { + set_be_config.call("ratio_of_defaults_as_sparse_column", "1") + table_name = "github_events" + create_table.call(table_name, 10) + List daysEveryMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + // 2015 + def year = "2015" + def monthPrefix = "0" + def dayPrefix = "0" + log.info("current year: ${year}") + for (int i = 1; i <= 3; i++) { + def month = i < 10 ? monthPrefix + i.toString() : i.toString() + log.info("current month: ${month}") + for (int j = 1; j <= daysEveryMonth[i - 1]; j++) { + def day = j < 10 ? dayPrefix + j.toString() : j.toString() + log.info("current day: ${day}") + for (int z = 1; z < 24; z++) { + def hour = z.toString() + log.info("current hour: ${hour}") + def fileName = year + "-" + month + "-" + day + "-" + hour + ".json" + log.info("cuurent fileName: ${fileName}") + load_json_data.call(table_name, """${getS3Url() + '/regression/github_events_dataset/' + fileName}""") + } + } + } + + qt_sql("select count() from github_events") + } finally { + // reset flags + set_be_config.call("ratio_of_defaults_as_sparse_column", "0.95") + } +} diff --git a/regression-test/suites/variant_p2/sql/authorsWithTheMostPushes.sql b/regression-test/suites/variant_p2/sql/authorsWithTheMostPushes.sql new file mode 100644 index 0000000000..184d7fd6e8 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/authorsWithTheMostPushes.sql @@ -0,0 +1,9 @@ + SELECT + cast(actor:login as string), + count() AS c, + count(distinct cast(repo:name as string)) AS repos + FROM github_events + WHERE type = 'PushEvent' + GROUP BY cast(actor:login as string) + ORDER BY c DESC, 1, 3 + LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/countingStar1.sql b/regression-test/suites/variant_p2/sql/countingStar1.sql new file mode 100644 index 0000000000..603dadf7fb --- /dev/null +++ b/regression-test/suites/variant_p2/sql/countingStar1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE type = 'WatchEvent' diff --git a/regression-test/suites/variant_p2/sql/countingStar2.sql b/regression-test/suites/variant_p2/sql/countingStar2.sql new file mode 100644 index 0000000000..85301adddf --- /dev/null +++ b/regression-test/suites/variant_p2/sql/countingStar2.sql @@ -0,0 +1 @@ +SELECT cast(payload:action as string), count() FROM github_events WHERE type = 'WatchEvent' GROUP BY cast(payload:action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/countingStar3.sql b/regression-test/suites/variant_p2/sql/countingStar3.sql new file mode 100644 index 0000000000..c1530e6c0a --- /dev/null +++ b/regression-test/suites/variant_p2/sql/countingStar3.sql @@ -0,0 +1 @@ + SELECT count() FROM github_events WHERE type = 'WatchEvent' AND cast(repo:name as string) IN ('apache/spark', 'GunZi200/Memory-Colour', 'isohuntto/openbay', 'wasabeef/awesome-android-ui') GROUP BY cast(payload:action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/distributionOfRepositoriesByStarCount.sql b/regression-test/suites/variant_p2/sql/distributionOfRepositoriesByStarCount.sql new file mode 100644 index 0000000000..3cc0e2ea79 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/distributionOfRepositoriesByStarCount.sql @@ -0,0 +1,14 @@ +SELECT + cast(pow(10, floor(log10(c))) as int) AS stars, + count(distinct k) +FROM +( + SELECT + cast(repo:name as string) as k, + count() AS c + FROM github_events + WHERE type = 'WatchEvent' + GROUP BY cast(repo:name as string) +) t +GROUP BY stars +ORDER BY stars ASC diff --git a/regression-test/suites/variant_p2/sql/githubRoulette.sql b/regression-test/suites/variant_p2/sql/githubRoulette.sql new file mode 100644 index 0000000000..4fc2555cef --- /dev/null +++ b/regression-test/suites/variant_p2/sql/githubRoulette.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string) FROM github_events WHERE type = 'WatchEvent' ORDER BY created_at, cast(repo:name as string) LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql new file mode 100644 index 0000000000..4f55826ff6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql new file mode 100644 index 0000000000..4f55826ff6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql new file mode 100644 index 0000000000..076aaf3054 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, cast(repo:name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql new file mode 100644 index 0000000000..4f55826ff6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql new file mode 100644 index 0000000000..4f55826ff6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql new file mode 100644 index 0000000000..4f55826ff6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND year(created_at) = '2015' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql new file mode 100644 index 0000000000..836e3f24c1 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql @@ -0,0 +1,29 @@ +SELECT + repo_name, + year, + cnt +FROM +( + SELECT + row_number() OVER (PARTITION BY year ORDER BY cnt DESC) AS r, + repo_name, + year, + cnt + FROM + ( + SELECT + lower(cast(repo:name as string)) AS repo_name, + year(created_at) AS year, + count() AS cnt + FROM github_events + WHERE (type = 'WatchEvent') AND (year(created_at) >= 2015) + GROUP BY + repo_name, + year + ) t +) t2 +WHERE r <= 10 +ORDER BY + year ASC, + cnt DESC, + repo_name diff --git a/regression-test/suites/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.sql b/regression-test/suites/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.sql new file mode 100644 index 0000000000..c85f96df6f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/howHasTheTotalNumberOfStarsChangedOverTime.sql @@ -0,0 +1,2 @@ +SELECT year(created_at) AS year, count() AS stars FROM github_events WHERE type = 'WatchEvent' GROUP BY year ORDER BY year + diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments1.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments1.sql new file mode 100644 index 0000000000..0a60a5fbb6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE type = 'IssueCommentEvent' diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments2.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments2.sql new file mode 100644 index 0000000000..3a96ad5804 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments2.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() FROM github_events WHERE type = 'IssueCommentEvent' GROUP BY cast(repo:name as string) ORDER BY count() DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments3.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments3.sql new file mode 100644 index 0000000000..36c76fc9bc --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments3.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + comments, + issues, + cast(round(comments / issues, 0) as int) AS ratio +FROM +( + SELECT + cast(repo:name as string) as repo_name, + count() AS comments, + count(distinct cast(payload:issue.`number` as int)) AS issues + FROM github_events + WHERE type = 'IssueCommentEvent' + GROUP BY cast(repo:name as string) +) t +ORDER BY comments DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments4.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments4.sql new file mode 100644 index 0000000000..3375d63c66 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments4.sql @@ -0,0 +1,10 @@ +SELECT + cast(repo:name as string), + cast(payload:issue.`number` as int) as number, + count() AS comments +FROM github_events +WHERE type = 'IssueCommentEvent' AND (cast(payload:action as string) = 'created') +GROUP BY cast(repo:name as string), number +ORDER BY comments DESC, number ASC, 1 +LIMIT 50 + diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments5.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments5.sql new file mode 100644 index 0000000000..71be66e5d8 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments5.sql @@ -0,0 +1,9 @@ +SELECT + cast(repo:name as string), + cast(payload:issue.number as int) as number, + count() AS comments +FROM github_events +WHERE type = 'IssueCommentEvent' AND (cast(payload:action as string) = 'created') AND (cast(payload:issue.number as int) > 10) +GROUP BY cast(repo:name as string), number +ORDER BY comments DESC, cast(repo:name as string), number +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments6.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments6.sql new file mode 100644 index 0000000000..80503c1e1d --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments6.sql @@ -0,0 +1,11 @@ +SELECT + cast(repo:name as string), + cast(payload:issue.`number` as int) as number, + count() AS comments, + count(distinct cast(actor:login as string)) AS authors +FROM github_events +WHERE type = 'IssueCommentEvent' AND (cast(payload:action as string) = 'created') AND (cast(payload:issue.`number` as int) > 10) +GROUP BY cast(repo:name as string), number +HAVING authors >= 4 +ORDER BY comments DESC, number +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments7.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments7.sql new file mode 100644 index 0000000000..cd0a367522 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments7.sql @@ -0,0 +1,9 @@ +SELECT + cast(repo:name as string), + count() AS comments, + count(distinct cast(actor:login as string)) AS authors +FROM github_events +WHERE type = 'CommitCommentEvent' +GROUP BY cast(repo:name as string) +ORDER BY count() DESC, 1, 3 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/issuesWithTheMostComments8.sql b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments8.sql new file mode 100644 index 0000000000..3b26dd4b6d --- /dev/null +++ b/regression-test/suites/variant_p2/sql/issuesWithTheMostComments8.sql @@ -0,0 +1,13 @@ +SELECT + concat('https://github.com/', cast(repo:name as string), '/commit/', cast(payload:commit_id as string)) URL, + cast(payload:commit_id as string) AS commit_id, + count() AS comments, + count(distinct cast(actor:login as string)) AS authors +FROM github_events +WHERE (type = 'CommitCommentEvent') AND cast(payload:commit_id as string) != "" +GROUP BY + cast(repo:name as string), + commit_id +HAVING authors >= 10 +ORDER BY count() DESC, URL, authors +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/mostForkedRepositories.sql b/regression-test/suites/variant_p2/sql/mostForkedRepositories.sql new file mode 100644 index 0000000000..6dd01ef3fb --- /dev/null +++ b/regression-test/suites/variant_p2/sql/mostForkedRepositories.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS forks FROM github_events WHERE type = 'ForkEvent' GROUP BY cast(repo:name as string) ORDER BY forks DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/mostPopularCommentsOnGithub.sql b/regression-test/suites/variant_p2/sql/mostPopularCommentsOnGithub.sql new file mode 100644 index 0000000000..2f68bea3d0 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/mostPopularCommentsOnGithub.sql @@ -0,0 +1 @@ +SELECT cast(payload:comment.body as string) as comment, count() FROM github_events WHERE cast(payload:comment.body as string) != "" AND length(cast(payload:comment.body as string)) < 100 GROUP BY comment ORDER BY count() DESC, comment, 1 LIMIT 1 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfRepositories.sql b/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfRepositories.sql new file mode 100644 index 0000000000..f7411c1e9f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfRepositories.sql @@ -0,0 +1,14 @@ +SELECT + lower(split_part(repo_name, '/', 1)) AS org, + count(distinct repo_name) AS repos +FROM +( + SELECT cast(repo:name as string) as repo_name + FROM github_events + WHERE type = 'WatchEvent' + GROUP BY cast(repo:name as string) + HAVING count() >= 10 +) t +GROUP BY org +ORDER BY repos DESC, org ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfStars.sql b/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfStars.sql new file mode 100644 index 0000000000..e6c1dd9e44 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/organizationsByTheNumberOfStars.sql @@ -0,0 +1,12 @@ +SELECT + lower(split_part(repo_name, '/', 1)) AS org, + count() AS stars +FROM ( + SELECT cast(repo:name as string) as repo_name + FROM github_events + WHERE type = 'WatchEvent' +) t +GROUP BY org +ORDER BY stars DESC, 1 +LIMIT 50 + diff --git a/regression-test/suites/variant_p2/sql/organizationsByTheSizeOfCommunity.sql b/regression-test/suites/variant_p2/sql/organizationsByTheSizeOfCommunity.sql new file mode 100644 index 0000000000..f7f8d6bcde --- /dev/null +++ b/regression-test/suites/variant_p2/sql/organizationsByTheSizeOfCommunity.sql @@ -0,0 +1,24 @@ +SELECT + lower(split_part(repo_name, '/', 1)) AS org, + count(distinct actor_login) AS authors, + count(distinct pr_author) AS pr_authors, + count(distinct issue_author) AS issue_authors, + count(distinct comment_author) AS comment_authors, + count(distinct review_author) AS review_authors, + count(distinct push_author) AS push_authors +FROM +( +SELECT + cast(repo:name as string) as repo_name, + cast(actor:login as string) as actor_login, + CASE WHEN type = 'PullRequestEvent' THEN cast(actor:login as string) ELSE NULL END pr_author, + CASE WHEN type = 'IssuesEvent' THEN cast(actor:login as string) ELSE NULL END issue_author, + CASE WHEN type = 'IssueCommentEvent' THEN cast(actor:login as string) ELSE NULL END comment_author, + CASE WHEN type = 'PullRequestReviewCommentEvent' THEN cast(actor:login as string) ELSE NULL END review_author, + CASE WHEN type = 'PushEvent' THEN cast(actor:login as string) ELSE NULL END push_author +FROM github_events +WHERE type IN ('PullRequestEvent', 'IssuesEvent', 'IssueCommentEvent', 'PullRequestReviewCommentEvent', 'PushEvent') +) t +GROUP BY org +ORDER BY authors DESC, org +limit 5 diff --git a/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks1.sql b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks1.sql new file mode 100644 index 0000000000..738cc9a962 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks1.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + cast(round(sum(star) / sum(fork), 0) as int) AS ratio +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY forks DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks2.sql b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks2.sql new file mode 100644 index 0000000000..0d98ee9ce6 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + cast(round(sum(star) / sum(fork), 3) as int) AS ratio +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 20) AND (forks >= 10) +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks3.sql b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks3.sql new file mode 100644 index 0000000000..abd98d8f08 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks3.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + cast(round(sum(fork) / sum(star), 2) as int) AS ratio +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 4) AND (forks > 4) +ORDER BY ratio, repo_name DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks4.sql b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks4.sql new file mode 100644 index 0000000000..b3c20be7e8 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks4.sql @@ -0,0 +1,13 @@ +SELECT + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 2) AS ratio +FROM +( + SELECT + cast(repo:name as string), + CASE WHEN type = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('ForkEvent', 'WatchEvent') +) t diff --git a/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks5.sql b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks5.sql new file mode 100644 index 0000000000..8eabb99d93 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/proportionsBetweenStarsAndForks5.sql @@ -0,0 +1,21 @@ +SELECT + sum(forks) AS forks, + sum(stars) AS stars, + round(sum(stars) / sum(forks), 2) AS ratio +FROM +( + SELECT + sum(fork) AS forks, + sum(star) AS stars + FROM + ( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('ForkEvent', 'WatchEvent') + ) t + GROUP BY repo_name + HAVING stars > 10 +) t2 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments1.sql new file mode 100644 index 0000000000..6ad7bf32b1 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() FROM github_events WHERE lower(cast(payload:comment.body as string)) LIKE '%apache%' GROUP BY cast(repo:name as string) ORDER BY count() DESC, cast(repo:name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments2.sql new file mode 100644 index 0000000000..991b6367ce --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithClickhouse_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(payload:comment.body as string)) LIKE '%apache%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(payload:comment.body as string)) LIKE '%apache%') OR (type = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC, repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments1.sql new file mode 100644 index 0000000000..c7a6ed5898 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() FROM github_events WHERE lower(cast(payload:comment.body as string)) LIKE '%spark%' GROUP BY cast(repo:name as string) ORDER BY count() DESC, cast(repo:name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments2.sql new file mode 100644 index 0000000000..baeea16eb2 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithDoris_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(payload:comment.body as string)) LIKE '%spark%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(payload:comment.body as string)) LIKE '%spark%') OR (type = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC,repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.sql new file mode 100644 index 0000000000..b490a1b2c9 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheHighestGrowthYoY.sql @@ -0,0 +1,19 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + cast(round(sum(created_at_2022) / sum(created_at_2015), 0) as int) AS yoy +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE year(created_at) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(created_at) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + created_at as created_at + FROM github_events + WHERE type = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2023-01-01 00:00:00') AND ((stars2022 >= 1) or (stars2015 >= 1)) +ORDER BY yoy DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.sql new file mode 100644 index 0000000000..798bde5e95 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues1.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS c, count(distinct cast(actor:login as string)) AS u FROM github_events WHERE type = 'IssuesEvent' AND cast(payload:action as string) = 'opened' GROUP BY cast(repo:name as string) ORDER BY c DESC, cast(repo:name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.sql new file mode 100644 index 0000000000..0abfec6ca8 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN cast(actor:login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE type IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY c DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.sql new file mode 100644 index 0000000000..6cc8a508c5 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues3.sql @@ -0,0 +1,19 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN cast(actor:login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE type IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 10 +ORDER BY c, u, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.sql new file mode 100644 index 0000000000..3f85d7db48 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfIssues4.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') THEN cast(actor:login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE type IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY u, c, stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.sql new file mode 100644 index 0000000000..233048b6a4 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests1.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string) as repo_name, count(), count(distinct cast(actor:login as string)) FROM github_events WHERE type = 'PullRequestEvent' AND cast(payload:action as string) = 'opened' GROUP BY cast(repo:name as string) ORDER BY 2,1,3 DESC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.sql new file mode 100644 index 0000000000..7ec7474191 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumAmountOfPullRequests2.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count(), count(distinct cast(actor:login as string)) AS u FROM github_events WHERE type = 'PullRequestEvent' AND cast(payload:action as string) = 'opened' GROUP BY cast(repo:name as string) ORDER BY u DESC, 2 DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql new file mode 100644 index 0000000000..1a899f6a83 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(invitation) AS invitations, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'MemberEvent' THEN 1 ELSE 0 END AS invitation, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE type IN ('MemberEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 2 +ORDER BY invitations DESC, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql new file mode 100644 index 0000000000..6419fff814 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql @@ -0,0 +1,13 @@ +SELECT + repo_name, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + lower(cast(repo:name as string)) as repo_name, + CASE WHEN type = 'PushEvent' THEN cast(actor:login as string) ELSE NULL END AS actor_login, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events WHERE type IN ('PushEvent', 'WatchEvent') AND cast(repo:name as string) != '/' +) t +GROUP BY repo_name ORDER BY u, stars, repo_name DESC LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql new file mode 100644 index 0000000000..b79bb47a71 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql @@ -0,0 +1,13 @@ +SELECT + repo_name, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'PushEvent' AND (cast(payload:ref as string) LIKE '%/master' OR cast(payload:ref as string) LIKE '%/main') THEN cast(actor:login as string) ELSE NULL END AS actor_login, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events WHERE type IN ('PushEvent', 'WatchEvent') AND cast(repo:name as string) != '/' +) t +GROUP BY repo_name ORDER BY u, repo_name DESC LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql new file mode 100644 index 0000000000..3a4747488f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql @@ -0,0 +1,16 @@ +SELECT + repo_name, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE WHEN type = 'PushEvent' AND (cast(payload:ref as string) LIKE '%/master' OR cast(payload:ref as string) LIKE '%/main') THEN cast(actor:login as string) ELSE NULL END AS actor_login, + CASE WHEN type = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events WHERE type IN ('PushEvent', 'WatchEvent') AND cast(repo:name as string) != '/' +) t +GROUP BY repo_name +HAVING stars >= 100 +ORDER BY u DESC, repo_name +LIMIT 50; diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.sql new file mode 100644 index 0000000000..07f434c0dd --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay1.sql @@ -0,0 +1,22 @@ +SELECT + repo_name, + stars +FROM +( + SELECT + row_number() OVER (PARTITION BY repo_name ORDER BY stars DESC) AS rank, + repo_name, + stars + FROM + ( + SELECT + cast(repo:name as string) as repo_name, + count() AS stars + FROM github_events + WHERE type = 'WatchEvent' + GROUP BY cast(repo:name as string) + ) t1 +) t2 +WHERE rank = 1 +ORDER BY stars DESC, repo_name, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.sql new file mode 100644 index 0000000000..71c7aaa529 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay2.sql @@ -0,0 +1,22 @@ +SELECT + repo_name, + stars +FROM +( + SELECT + row_number() OVER (PARTITION BY repo_name ORDER BY stars DESC) AS rank, + repo_name, + stars + FROM + ( + SELECT + cast(repo:name as string) as repo_name, + count() AS stars + FROM github_events + WHERE type = 'WatchEvent' + GROUP BY cast(repo:name as string) + ) t1 +) t2 +WHERE rank = 1 +ORDER BY stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.sql new file mode 100644 index 0000000000..9a95075695 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostStarsOverOneDay3.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string) as repo_name, count() AS stars FROM github_events WHERE type = 'WatchEvent' GROUP BY cast(repo:name as string) ORDER BY stars DESC, repo_name LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.sql new file mode 100644 index 0000000000..13cb308f64 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheMostSteadyGrowthOverTime.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + max(stars) AS daily_stars, + sum(stars) AS total_stars, + cast(round(sum(stars) / max(stars), 0) as int) AS rate +FROM +( + SELECT + cast(repo:name as string) as repo_name, + count() AS stars + FROM github_events + WHERE type = 'WatchEvent' + GROUP BY + repo_name +) t +GROUP BY repo_name +ORDER BY rate DESC, repo_name, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoriesWithTheWorstStagnation_order.sql b/regression-test/suites/variant_p2/sql/repositoriesWithTheWorstStagnation_order.sql new file mode 100644 index 0000000000..1c140c4557 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoriesWithTheWorstStagnation_order.sql @@ -0,0 +1,19 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + cast(round(sum(created_at_2022) / sum(created_at_2015), 0) as int) AS yoy +FROM +( + SELECT + cast(repo:name as string) as repo_name, + CASE year(created_at) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(created_at) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + created_at as created_at + FROM github_events + WHERE type = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2019-01-01 00:00:00') AND ((max(created_at) >= '2020-06-01 00:00:00') OR (stars2015 >= 2)) +ORDER BY yoy, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/repositoryAffinityList2.sql b/regression-test/suites/variant_p2/sql/repositoryAffinityList2.sql new file mode 100644 index 0000000000..3f0183145f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/repositoryAffinityList2.sql @@ -0,0 +1,23 @@ +SELECT + repo_name, + total_stars, + round(spark_stars / total_stars, 2) AS ratio +FROM +( + SELECT + cast(repo:name as string) as repo_name, + count(distinct cast(actor:login as string)) AS total_stars + FROM github_events + WHERE (type = 'WatchEvent') AND (cast(repo:name as string) NOT IN ('apache/spark')) + GROUP BY repo_name + HAVING total_stars >= 10 +) t1 +JOIN +( + SELECT + count(distinct cast(actor:login as string)) AS spark_stars + FROM github_events + WHERE (type = 'WatchEvent') AND (cast(repo:name as string) IN ('apache/spark')) +) t2 +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/sql01.sql b/regression-test/suites/variant_p2/sql/sql01.sql new file mode 100644 index 0000000000..a9245641fa --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql01.sql @@ -0,0 +1 @@ +SELECT payload:commits FROM github_events where cast(payload:push_id as int) = 536740433; diff --git a/regression-test/suites/variant_p2/sql/sql02.sql b/regression-test/suites/variant_p2/sql/sql02.sql new file mode 100644 index 0000000000..5265fa7d80 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql02.sql @@ -0,0 +1 @@ +SELECT count(cast(payload:commits as string)) FROM github_events; \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/sql03.sql b/regression-test/suites/variant_p2/sql/sql03.sql new file mode 100644 index 0000000000..a69cd107d0 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql03.sql @@ -0,0 +1 @@ +SELECT count(cast(payload:commits as string)) FROM github_events WHERE cast(payload:push_id as int) > 100; diff --git a/regression-test/suites/variant_p2/sql/sql04.sql b/regression-test/suites/variant_p2/sql/sql04.sql new file mode 100644 index 0000000000..97c7e0eb7a --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql04.sql @@ -0,0 +1 @@ +SELECT repo:id, payload:issue FROM github_events WHERE cast(payload:issue.state as string) = "open" order by cast(repo:id as int), id limit 10; diff --git a/regression-test/suites/variant_p2/sql/sql05.sql b/regression-test/suites/variant_p2/sql/sql05.sql new file mode 100644 index 0000000000..c10f5c5940 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql05.sql @@ -0,0 +1 @@ +-- SELECT count(cast(payload:issue as string)) FROM github_events; \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/sql06.sql b/regression-test/suites/variant_p2/sql/sql06.sql new file mode 100644 index 0000000000..cc95540128 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql06.sql @@ -0,0 +1 @@ +SELECT count(cast(payload:issue as string)) FROM github_events where cast(payload:issue.state as string) = "closed"; \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/sql07.sql b/regression-test/suites/variant_p2/sql/sql07.sql new file mode 100644 index 0000000000..c1e8b15867 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql07.sql @@ -0,0 +1,2 @@ +set enable_two_phase_read_opt = false; +SELECT payload:commits FROM github_events order by id limit 10; \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/sql08.sql b/regression-test/suites/variant_p2/sql/sql08.sql new file mode 100644 index 0000000000..37e47fb5af --- /dev/null +++ b/regression-test/suites/variant_p2/sql/sql08.sql @@ -0,0 +1 @@ +SELECT payload:issue.user FROM github_events WHERE cast(payload:issue.state as string) = "open" and cast(payload:issue.locked as int) = 0 order by cast(repo:id as int), id limit 10; diff --git a/regression-test/suites/variant_p2/sql/theLongestRepositoryNames1.sql b/regression-test/suites/variant_p2/sql/theLongestRepositoryNames1.sql new file mode 100644 index 0000000000..0b99aac343 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theLongestRepositoryNames1.sql @@ -0,0 +1 @@ +SELECT count(), cast(repo:name as string) FROM github_events WHERE type = 'WatchEvent' GROUP BY cast(repo:name as string) ORDER BY length(cast(repo:name as string)) DESC, cast(repo:name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/theLongestRepositoryNames2.sql b/regression-test/suites/variant_p2/sql/theLongestRepositoryNames2.sql new file mode 100644 index 0000000000..89c9a62955 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theLongestRepositoryNames2.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() FROM github_events WHERE type = 'WatchEvent' AND cast(repo:name as string) LIKE '%_/_%' GROUP BY cast(repo:name as string) ORDER BY length(cast(repo:name as string)) ASC, cast(repo:name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/theMostToughCodeReviews.sql b/regression-test/suites/variant_p2/sql/theMostToughCodeReviews.sql new file mode 100644 index 0000000000..d7449fe8a4 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theMostToughCodeReviews.sql @@ -0,0 +1,10 @@ +SELECT + concat('https://github.com/', cast(repo:name as string), '/pull/') AS URL, + count(distinct cast(actor:login as string)) AS authors +FROM github_events +WHERE (type = 'PullRequestReviewCommentEvent') AND (cast(payload:action as string) = 'created') +GROUP BY + cast(repo:name as string), + cast(payload:issue.`number` as string) +ORDER BY authors DESC, URL ASC +LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.sql b/regression-test/suites/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.sql new file mode 100644 index 0000000000..a67c50432f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theTotalNumberOfRepositoriesOnGithub.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(repo:name as string)) FROM github_events diff --git a/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub1.sql b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub1.sql new file mode 100644 index 0000000000..74135944c3 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub1.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(actor:login as string)) FROM github_events diff --git a/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub2.sql b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub2.sql new file mode 100644 index 0000000000..b02fd0d60f --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub2.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(actor:login as string)) FROM github_events WHERE type = 'WatchEvent' diff --git a/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub3.sql b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub3.sql new file mode 100644 index 0000000000..5874548978 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub3.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(actor:login as string)) FROM github_events WHERE type = 'PushEvent' diff --git a/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub4.sql b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub4.sql new file mode 100644 index 0000000000..cc870d53aa --- /dev/null +++ b/regression-test/suites/variant_p2/sql/theTotalNumberOfUsersOnGithub4.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(actor:login as string)) FROM github_events WHERE type = 'PullRequestEvent' AND cast(payload:action as string) = 'opened' diff --git a/regression-test/suites/variant_p2/sql/topLabels1.sql b/regression-test/suites/variant_p2/sql/topLabels1.sql new file mode 100644 index 0000000000..c45c3a90f8 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/topLabels1.sql @@ -0,0 +1,10 @@ +-- SELECT +-- id, +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(cast(payload:labels as string), ',') t AS label +-- WHERE (type IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (cast(payload:action as string) IN ('created', 'opened', 'labeled')) +-- GROUP BY label, id +-- ORDER BY c, id DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/topLabels2.sql b/regression-test/suites/variant_p2/sql/topLabels2.sql new file mode 100644 index 0000000000..99941f2898 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/topLabels2.sql @@ -0,0 +1,9 @@ +-- SELECT +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(cast(payload:labels as string), ',') t AS label +-- WHERE (type IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (cast(payload:action as string) IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- GROUP BY label +-- ORDER BY c DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/topLabels3.sql b/regression-test/suites/variant_p2/sql/topLabels3.sql new file mode 100644 index 0000000000..205202e802 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/topLabels3.sql @@ -0,0 +1,14 @@ +-- SELECT +-- sum(bug) AS bugs, +-- sum(feature) AS feature, +-- sum(bug) / sum(feature) AS ratio +-- FROM +-- ( +-- SELECT +-- CASE WHEN lower(cast(payload:labels as string)) LIKE '%bug%' THEN 1 ELSE 0 END AS bug, +-- CASE WHEN lower(cast(payload:labels as string)) LIKE '%feature%' THEN 1 ELSE 0 END AS feature +-- FROM github_events +-- LATERAL VIEW explode_split(cast(payload:labels as string), ',') t AS label +-- WHERE (type IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (cast(payload:action as string) IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- ) t +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/topRepositoriesByStars.sql b/regression-test/suites/variant_p2/sql/topRepositoriesByStars.sql new file mode 100644 index 0000000000..36a87fa709 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/topRepositoriesByStars.sql @@ -0,0 +1 @@ +SELECT cast(repo:name as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' GROUP BY cast(repo:name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.sql b/regression-test/suites/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.sql new file mode 100644 index 0000000000..a742c30dc5 --- /dev/null +++ b/regression-test/suites/variant_p2/sql/whatIsTheBestDayOfTheWeekToCatchAStar.sql @@ -0,0 +1 @@ +SELECT dayofweek(created_at) AS day, count() AS stars FROM github_events WHERE type = 'WatchEvent' GROUP BY day ORDER BY day diff --git a/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars1.sql b/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars1.sql new file mode 100644 index 0000000000..cb163ad2cc --- /dev/null +++ b/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars1.sql @@ -0,0 +1 @@ +SELECT cast(actor:login as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' GROUP BY cast(actor:login as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars2.sql b/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars2.sql new file mode 100644 index 0000000000..a46da4812e --- /dev/null +++ b/regression-test/suites/variant_p2/sql/whoAreAllThosePeopleGivingStars2.sql @@ -0,0 +1 @@ +SELECT cast(actor:login as string), count() AS stars FROM github_events WHERE type = 'WatchEvent' AND cast(actor:login as string) = 'cliffordfajardo' GROUP BY cast(actor:login as string) ORDER BY stars DESC LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs1.sql b/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs1.sql new file mode 100644 index 0000000000..3a2009a53b --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs1.sql @@ -0,0 +1,16 @@ +-- ERROR: unmatched column +-- SELECT +-- cast(repo:name as string), +-- count() AS prs, +-- count(distinct cast(actor:login as string)) AS authors +-- FROM github_events +-- WHERE (type = 'PullRequestEvent') AND (cast(payload:action as string) = 'opened') AND (cast(actor:login as string) IN +-- ( +-- SELECT cast(actor:login as string) +-- FROM github_events +-- WHERE (type = 'PullRequestEvent') AND (cast(payload:action as string)= 'opened') AND (cast(repo:name as string) IN ('rspec/rspec-core', 'golden-warning/giraffedraft-server', 'apache/spark')) +-- )) AND (lower(cast(repo:name as string)) NOT LIKE '%clickhouse%') +-- GROUP BY cast(repo:name as string) +-- ORDER BY authors DESC, prs DESC, length(cast(repo:name as string)) DESC +-- LIMIT 50 +-- \ No newline at end of file diff --git a/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs2.sql b/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs2.sql new file mode 100644 index 0000000000..5d2f275313 --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/affinityByIssuesAndPRs2.sql @@ -0,0 +1,15 @@ +-- ERROR: unmatched column +-- SELECT +-- cast(repo:name as string), +-- count() AS prs, +-- count(distinct cast(actor:login as string)) AS authors +-- FROM github_events +-- WHERE (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') AND (cast(actor:login as string) IN +-- ( +-- SELECT cast(actor:login as string) +-- FROM github_events +-- WHERE (type = 'IssuesEvent') AND (cast(payload:action as string) = 'opened') AND (cast(repo:name as string) IN ('No-CQRT/GooGuns', 'ivolunteerph/ivolunteerph', 'Tribler/tribler')) +-- )) AND (lower(cast(repo:name as string)) NOT LIKE '%clickhouse%') +-- GROUP BY cast(repo:name as string) +-- ORDER BY authors DESC, prs DESC, cast(repo:name as string) ASC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/repositoriesByAmountOfModifiedCode.sql b/regression-test/suites/variant_p2/unresovled_sql/repositoriesByAmountOfModifiedCode.sql new file mode 100644 index 0000000000..f232a6f032 --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/repositoriesByAmountOfModifiedCode.sql @@ -0,0 +1,13 @@ +-- ERROR missmatched result +-- SELECT +-- cast(repo:name as string) as repo_name, +-- count() AS prs, +-- count(distinct cast(actor:login as string)) AS authors, +-- sum(cast(payload:pull_request.additions as int)) AS adds, +-- sum(cast(v:payload.pull_request.deletions as int)) AS dels +-- FROM github_events +-- WHERE (type = 'PullRequestEvent') AND (cast(payload:action as string) = 'opened') AND (cast(payload:pull_request.additions as int) < 10000) AND (cast(v:payload.pull_request.deletions as int) < 10000) +-- GROUP BY repo_name +-- HAVING (adds / dels) < 10 +-- ORDER BY adds + dels DESC, 1 +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/repositoriesByTheNumberOfPushes.sql b/regression-test/suites/variant_p2/unresovled_sql/repositoriesByTheNumberOfPushes.sql new file mode 100644 index 0000000000..b5c9ac1fb2 --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/repositoriesByTheNumberOfPushes.sql @@ -0,0 +1,18 @@ +--ERROR: crash column.h:496] not support +-- SELECT +-- cast(repo:name as string), +-- count() AS pushes, +-- count(distinct cast(actor:login as string)) AS authors +-- FROM github_events +-- WHERE (type = 'PushEvent') AND (cast(repo:name as string) IN +-- ( +-- SELECT cast(repo:name as string) +-- FROM github_events +-- WHERE type = 'WatchEvent' +-- GROUP BY cast(repo:name as string) +-- ORDER BY count() DESC +-- LIMIT 10000 +-- )) +-- GROUP BY cast(repo:name as string) +-- ORDER BY count() DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/repositoryAffinityList1.sql b/regression-test/suites/variant_p2/unresovled_sql/repositoryAffinityList1.sql new file mode 100644 index 0000000000..149e31e031 --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/repositoryAffinityList1.sql @@ -0,0 +1,14 @@ +-- ERROR: unmatched column +-- SELECT +-- cast(repo:name as string) as repo_name, +-- count() AS stars +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(actor:login as string) IN +-- ( +-- SELECT cast(actor:login as string) +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(repo:name as string) IN ('apache/spark', 'prakhar1989/awesome-courses')) +-- )) AND (cast(repo:name as string) NOT IN ('ClickHouse/ClickHouse', 'yandex/ClickHouse')) +-- GROUP BY repo_name +-- ORDER BY stars DESC, repo_name +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers1.sql b/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers1.sql new file mode 100644 index 0000000000..0c5f159a8b --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers1.sql @@ -0,0 +1,14 @@ +-- ERROR: unmatched column +-- SELECT +-- cast(repo:name as string), +-- count() +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(actor:login as string) IN +-- ( +-- SELECT cast(actor:login as string) +-- FROM github_events +-- WHERE (type = 'PullRequestEvent') AND (cast(payload:action as string) = 'opened') +-- )) +-- GROUP BY cast(repo:name as string) +-- ORDER BY count() DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers2.sql b/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers2.sql new file mode 100644 index 0000000000..e4ceb1b21b --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/starsFromHeavyGithubUsers2.sql @@ -0,0 +1,16 @@ +-- ERROR: crash not support replicate +-- SELECT +-- cast(repo:name as string), +-- count() +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(actor:login as string) IN +-- ( +-- SELECT cast(actor:login as string) +-- FROM github_events +-- WHERE (type = 'PullRequestEvent') AND (cast(payload:action as string) = 'opened') +-- GROUP BY cast(actor:login as string) +-- HAVING count() >= 2 +-- )) +-- GROUP BY cast(repo:name as string) +-- ORDER BY 1, count() DESC, 1 +-- LIMIT 50 diff --git a/regression-test/suites/variant_p2/unresovled_sql/whoAreAllThosePeopleGivingStars3.sql b/regression-test/suites/variant_p2/unresovled_sql/whoAreAllThosePeopleGivingStars3.sql new file mode 100644 index 0000000000..58083528b5 --- /dev/null +++ b/regression-test/suites/variant_p2/unresovled_sql/whoAreAllThosePeopleGivingStars3.sql @@ -0,0 +1,17 @@ +-- ERROR: unmatched column +-- SELECT +-- cast(repo:name as string), +-- count() AS stars +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(repo:name as string) IN +-- ( +-- SELECT cast(repo:name as string) +-- FROM github_events +-- WHERE (type = 'WatchEvent') AND (cast(actor:login as string) = 'cliffordfajardo') +-- )) +-- GROUP BY cast(repo:name as string) +-- ORDER BY stars DESC +-- LIMIT 50 + + +