From fa3e1dd4d43d7d72eba24ef2e900f96a3a4bb38f Mon Sep 17 00:00:00 2001 From: tiancaiamao Date: Tue, 30 Dec 2025 14:12:53 +0800 Subject: [PATCH] util/traceevent: make flaky test TestFlightRecorder stable (#65317) close pingcap/tidb#65230 --- pkg/util/traceevent/BUILD.bazel | 9 +- pkg/util/traceevent/flightrecorder.go | 14 +- pkg/util/traceevent/flightrecorder_test.go | 18 +++ pkg/util/traceevent/test/BUILD.bazel | 21 +++ .../traceevent/{ => test}/integration_test.go | 18 ++- pkg/util/traceevent/traceevent.go | 30 +++- pkg/util/traceevent/traceevent_test.go | 152 +++++++++++------- 7 files changed, 177 insertions(+), 85 deletions(-) create mode 100644 pkg/util/traceevent/test/BUILD.bazel rename pkg/util/traceevent/{ => test}/integration_test.go (96%) diff --git a/pkg/util/traceevent/BUILD.bazel b/pkg/util/traceevent/BUILD.bazel index 5738dce3a2..43020ca97f 100644 --- a/pkg/util/traceevent/BUILD.bazel +++ b/pkg/util/traceevent/BUILD.bazel @@ -10,6 +10,8 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/util/traceevent", visibility = ["//visibility:public"], deps = [ + "//pkg/config/kerneltype", + "//pkg/util/intest", "//pkg/util/logutil", "//pkg/util/tracing", "@com_github_tikv_client_go_v2//trace", @@ -24,20 +26,13 @@ go_test( srcs = [ "adapter_test.go", "flightrecorder_test.go", - "integration_test.go", "traceevent_test.go", ], embed = [":traceevent"], flaky = True, shard_count = 1, deps = [ - "//pkg/config", "//pkg/config/kerneltype", - "//pkg/keyspace", - "//pkg/kv", - "//pkg/session", - "//pkg/testkit", - "//pkg/util/logutil", "//pkg/util/tracing", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//trace", diff --git a/pkg/util/traceevent/flightrecorder.go b/pkg/util/traceevent/flightrecorder.go index faa8d631d0..15263b571f 100644 --- a/pkg/util/traceevent/flightrecorder.go +++ b/pkg/util/traceevent/flightrecorder.go @@ -42,10 +42,10 @@ var globalHTTPFlightRecorder atomic.Pointer[HTTPFlightRecorder] // TODO: rename HTTPFlightRecorder to FlightRecorder as it may sink to log now instead of just HTTP // TODO: remove the old global flight recorder, clean up code. type HTTPFlightRecorder struct { - ch chan<- []Event - oldEnabledCategories TraceCategory - counter atomic.Int64 // used when dump trigger config is sampling - Config *FlightRecorderConfig + ch chan<- []Event + enabledCategories TraceCategory + counter atomic.Int64 // used when dump trigger config is sampling + Config *FlightRecorderConfig compiledDumpTriggerConfig } @@ -453,7 +453,7 @@ func newHTTPFlightRecorder(config *FlightRecorderConfig) (*HTTPFlightRecorder, e categories := parseCategories(config.EnabledCategories) ret := &HTTPFlightRecorder{ - oldEnabledCategories: tracing.GetEnabledCategories(), + enabledCategories: categories, Config: config, compiledDumpTriggerConfig: compiled, } @@ -461,7 +461,6 @@ func newHTTPFlightRecorder(config *FlightRecorderConfig) (*HTTPFlightRecorder, e zap.Stringer("category", categories), zap.Any("mapping", compiled.nameMapping), zap.Uint64s("truthTable", ret.truthTable)) - SetCategories(categories) globalHTTPFlightRecorder.Store(ret) return ret, nil } @@ -488,8 +487,7 @@ func GetFlightRecorder() *HTTPFlightRecorder { } // Close closes the HTTP flight recorder. -func (r *HTTPFlightRecorder) Close() { - tracing.SetCategories(r.oldEnabledCategories) +func (*HTTPFlightRecorder) Close() { globalHTTPFlightRecorder.Store(nil) } diff --git a/pkg/util/traceevent/flightrecorder_test.go b/pkg/util/traceevent/flightrecorder_test.go index 8edc11e595..c3dc804fd1 100644 --- a/pkg/util/traceevent/flightrecorder_test.go +++ b/pkg/util/traceevent/flightrecorder_test.go @@ -22,6 +22,24 @@ import ( "github.com/stretchr/testify/require" ) +// Those functions are used in test only. +func (r *HTTPFlightRecorder) SetCategories(categories TraceCategory) { + r.enabledCategories = categories +} + +func (r *HTTPFlightRecorder) Disable(categories TraceCategory) { + current := r.enabledCategories + next := current &^ categories + r.enabledCategories = next +} + +// Enable enables trace events for the specified categories. +func (r *HTTPFlightRecorder) Enable(categories TraceCategory) { + current := r.enabledCategories + next := current | categories + r.enabledCategories = next +} + func testFlightRecorderConfigGoodCase(t *testing.T) { conf1 := `{ "enabled_categories": [ diff --git a/pkg/util/traceevent/test/BUILD.bazel b/pkg/util/traceevent/test/BUILD.bazel new file mode 100644 index 0000000000..3ff2403542 --- /dev/null +++ b/pkg/util/traceevent/test/BUILD.bazel @@ -0,0 +1,21 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "test_test", + timeout = "short", + srcs = ["integration_test.go"], + flaky = True, + deps = [ + "//pkg/config", + "//pkg/config/kerneltype", + "//pkg/keyspace", + "//pkg/kv", + "//pkg/session", + "//pkg/testkit", + "//pkg/util/logutil", + "//pkg/util/traceevent", + "//pkg/util/tracing", + "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//trace", + ], +) diff --git a/pkg/util/traceevent/integration_test.go b/pkg/util/traceevent/test/integration_test.go similarity index 96% rename from pkg/util/traceevent/integration_test.go rename to pkg/util/traceevent/test/integration_test.go index 079b0ee014..f687b05e91 100644 --- a/pkg/util/traceevent/integration_test.go +++ b/pkg/util/traceevent/test/integration_test.go @@ -69,9 +69,13 @@ func TestPrevTraceIDPersistence(t *testing.T) { }() // Enable all categories for this test - prevCategories := traceevent.GetEnabledCategories() - traceevent.SetCategories(traceevent.AllCategories) - defer traceevent.SetCategories(prevCategories) + var conf traceevent.FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err = traceevent.StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := traceevent.GetFlightRecorder() + defer fr.Close() recorder := traceevent.NewRingBufferSink(100) prevSink := traceevent.CurrentSink() @@ -164,6 +168,14 @@ func TestTraceControlIntegration(t *testing.T) { tracing.Enable(tracing.TiKVRequest) defer tracing.SetCategories(prevCategories) + var conf traceevent.FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"tikv_request"} + err := traceevent.StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := traceevent.GetFlightRecorder() + defer fr.Close() + ctx := context.Background() flags := trace.GetTraceControlFlags(ctx) require.True(t, flags.Has(trace.FlagTiKVCategoryRequest)) diff --git a/pkg/util/traceevent/traceevent.go b/pkg/util/traceevent/traceevent.go index 7bdf68b102..f162bee555 100644 --- a/pkg/util/traceevent/traceevent.go +++ b/pkg/util/traceevent/traceevent.go @@ -28,6 +28,8 @@ import ( "time" "unsafe" + "github.com/pingcap/tidb/pkg/config/kerneltype" + "github.com/pingcap/tidb/pkg/util/intest" "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/tracing" "github.com/tikv/client-go/v2/trace" @@ -111,16 +113,28 @@ func init() { var Enable = tracing.Enable // IsEnabled returns whether the specified category is enabled. -var IsEnabled = tracing.IsEnabled - -// Disable disables trace events for the specified categories. -var Disable = tracing.Disable - -// SetCategories sets the enabled categories to exactly the specified value. -var SetCategories = tracing.SetCategories +func IsEnabled(category tracing.TraceCategory) bool { + if kerneltype.IsClassic() && !intest.InTest { + return false + } + fr := GetFlightRecorder() + if fr == nil { + return false + } + if uint64(fr.enabledCategories)&uint64(category) == 0 { + return false + } + return true +} // GetEnabledCategories returns the currently enabled categories. -var GetEnabledCategories = tracing.GetEnabledCategories +func GetEnabledCategories() TraceCategory { + fr := GetFlightRecorder() + if fr == nil { + return TraceCategory(0) + } + return fr.enabledCategories +} // NormalizeMode converts a user-supplied tracing mode string into its canonical representation. func NormalizeMode(mode string) (string, error) { diff --git a/pkg/util/traceevent/traceevent_test.go b/pkg/util/traceevent/traceevent_test.go index da88c3866a..34e578db1c 100644 --- a/pkg/util/traceevent/traceevent_test.go +++ b/pkg/util/traceevent/traceevent_test.go @@ -25,6 +25,16 @@ import ( "go.uber.org/zap" ) +func TestSuite(t *testing.T) { + // Those tests use global variables, make them run in sequence to avoid flaky. + t.Run("TestTraceEventCategories", testTraceEventCategories) + t.Run("TestTraceEventCategoryFiltering", testTraceEventCategoryFiltering) + t.Run("TestTraceEventRecordsEvent", testTraceEventRecordsEvent) + t.Run("TestTraceEventCarriesTraceID", testTraceEventCarriesTraceID) + t.Run("TestTraceEventLoggingSwitch", testTraceEventLoggingSwitch) + t.Run("TestFlightRecorderCoolingOff", testFlightRecorderCoolingOff) +} + func installRecorderSink(t *testing.T, capacity int) *RingBufferSink { t.Helper() @@ -37,42 +47,47 @@ func installRecorderSink(t *testing.T, capacity int) *RingBufferSink { return recorder } -func TestTraceEventCategories(t *testing.T) { +func testTraceEventCategories(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - original := GetEnabledCategories() - t.Cleanup(func() { - SetCategories(original) - }) + + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() require.True(t, IsEnabled(TxnLifecycle)) - Disable(TxnLifecycle) + fr.Disable(TxnLifecycle) require.False(t, IsEnabled(TxnLifecycle)) - Enable(TxnLifecycle) + fr.Enable(TxnLifecycle) require.True(t, IsEnabled(TxnLifecycle)) - SetCategories(0) + fr.SetCategories(0) require.False(t, IsEnabled(TxnLifecycle)) - SetCategories(AllCategories) + fr.SetCategories(AllCategories) require.True(t, IsEnabled(TxnLifecycle)) } -func TestTraceEventCategoryFiltering(t *testing.T) { +func testTraceEventCategoryFiltering(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - originalCategories := GetEnabledCategories() - prevMode := CurrentMode() - t.Cleanup(func() { - SetCategories(originalCategories) - _, _ = SetMode(prevMode) - FlightRecorder().DiscardOrFlush() - }) - SetCategories(0) + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() + + fr.SetCategories(0) _, _ = SetMode(ModeFull) FlightRecorder().DiscardOrFlush() recorder := installRecorderSink(t, 8) @@ -109,21 +124,20 @@ func TestTraceEventModes(t *testing.T) { require.Error(t, err) } -func TestTraceEventRecordsEvent(t *testing.T) { +func testTraceEventRecordsEvent(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - originalCategories := GetEnabledCategories() - prevMode := CurrentMode() - prevSink := CurrentSink() - t.Cleanup(func() { - SetCategories(originalCategories) - _, _ = SetMode(prevMode) - SetSink(prevSink) - FlightRecorder().DiscardOrFlush() - }) - SetCategories(AllCategories) + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() + + fr.SetCategories(AllCategories) _, _ = SetMode(ModeFull) FlightRecorder().DiscardOrFlush() recorder := installRecorderSink(t, 8) @@ -147,19 +161,20 @@ func TestTraceEventRecordsEvent(t *testing.T) { require.Len(t, recorded[0].Fields, 2) } -func TestTraceEventCarriesTraceID(t *testing.T) { +func testTraceEventCarriesTraceID(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - originalCategories := GetEnabledCategories() - prevMode := CurrentMode() - t.Cleanup(func() { - SetCategories(originalCategories) - _, _ = SetMode(prevMode) - FlightRecorder().DiscardOrFlush() - }) - SetCategories(AllCategories) + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() + + fr.SetCategories(AllCategories) _, _ = SetMode(ModeFull) FlightRecorder().DiscardOrFlush() @@ -172,19 +187,20 @@ func TestTraceEventCarriesTraceID(t *testing.T) { require.Equal(t, rawTrace, events[0].TraceID) } -func TestTraceEventLoggingSwitch(t *testing.T) { +func testTraceEventLoggingSwitch(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - originalCategories := GetEnabledCategories() - prevMode := CurrentMode() - t.Cleanup(func() { - SetCategories(originalCategories) - _, _ = SetMode(prevMode) - FlightRecorder().DiscardOrFlush() - }) - SetCategories(AllCategories) + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() + + fr.SetCategories(AllCategories) _, _ = SetMode(ModeBase) FlightRecorder().DiscardOrFlush() recorder := installRecorderSink(t, 8) @@ -200,8 +216,8 @@ func TestTraceEventLoggingSwitch(t *testing.T) { _, _ = SetMode(ModeFull) TraceEvent(ctx, TxnLifecycle, "enabled-log", zap.Int("value", 2)) - fr := FlightRecorder().Snapshot() - require.Len(t, fr, flightBefore+2) + fr1 := FlightRecorder().Snapshot() + require.Len(t, fr1, flightBefore+2) recorded := recorder.Snapshot() require.Len(t, recorded, disabledLogged+1) require.Equal(t, "enabled-log", recorded[len(recorded)-1].Name) @@ -265,12 +281,18 @@ func TestCategoryNames(t *testing.T) { // BenchmarkTraceEventDisabled benchmarks the overhead when tracing is disabled. func BenchmarkTraceEventDisabled(b *testing.B) { ctx := context.Background() - prevCategories := GetEnabledCategories() prevMode := CurrentMode() - SetCategories(AllCategories) + + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(b, err) + fr := GetFlightRecorder() + defer fr.Close() + _, _ = SetMode(ModeOff) b.Cleanup(func() { - SetCategories(prevCategories) _, _ = SetMode(prevMode) }) @@ -285,12 +307,18 @@ func BenchmarkTraceEventDisabled(b *testing.B) { // BenchmarkTraceEventEnabled benchmarks the overhead when tracing is enabled. func BenchmarkTraceEventEnabled(b *testing.B) { ctx := context.Background() - prevCategories := GetEnabledCategories() + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(b, err) + fr := GetFlightRecorder() + defer fr.Close() + prevMode := CurrentMode() Enable(TxnLifecycle) _, _ = SetMode(ModeFull) b.Cleanup(func() { - SetCategories(prevCategories) _, _ = SetMode(prevMode) }) @@ -300,7 +328,6 @@ func BenchmarkTraceEventEnabled(b *testing.B) { zap.String("key", "value"), zap.Int("iteration", i)) } - SetCategories(0) } func extractNames(events []Event) []string { @@ -311,20 +338,27 @@ func extractNames(events []Event) []string { return names } -func TestFlightRecorderCoolingOff(t *testing.T) { +func testFlightRecorderCoolingOff(t *testing.T) { if kerneltype.IsClassic() { t.Skip("trace events only work for next-gen kernel") } - originalCategories := GetEnabledCategories() + prevMode := CurrentMode() t.Cleanup(func() { - SetCategories(originalCategories) _, _ = SetMode(prevMode) FlightRecorder().DiscardOrFlush() lastDumpTime.Store(0) // Reset cooling-off state }) - SetCategories(AllCategories) + var conf FlightRecorderConfig + conf.Initialize() + conf.EnabledCategories = []string{"*"} + err := StartLogFlightRecorder(&conf) + require.NoError(t, err) + fr := GetFlightRecorder() + defer fr.Close() + + fr.SetCategories(AllCategories) _, _ = SetMode(ModeFull) FlightRecorder().DiscardOrFlush() lastDumpTime.Store(0) // Reset cooling-off state