util/traceevent: make flaky test TestFlightRecorder stable (#65317)

close pingcap/tidb#65230
This commit is contained in:
tiancaiamao
2025-12-30 14:12:53 +08:00
committed by GitHub
parent 79ec3b66cf
commit fa3e1dd4d4
7 changed files with 177 additions and 85 deletions

View File

@ -10,6 +10,8 @@ go_library(
importpath = "github.com/pingcap/tidb/pkg/util/traceevent",
visibility = ["//visibility:public"],
deps = [
"//pkg/config/kerneltype",
"//pkg/util/intest",
"//pkg/util/logutil",
"//pkg/util/tracing",
"@com_github_tikv_client_go_v2//trace",
@ -24,20 +26,13 @@ go_test(
srcs = [
"adapter_test.go",
"flightrecorder_test.go",
"integration_test.go",
"traceevent_test.go",
],
embed = [":traceevent"],
flaky = True,
shard_count = 1,
deps = [
"//pkg/config",
"//pkg/config/kerneltype",
"//pkg/keyspace",
"//pkg/kv",
"//pkg/session",
"//pkg/testkit",
"//pkg/util/logutil",
"//pkg/util/tracing",
"@com_github_stretchr_testify//require",
"@com_github_tikv_client_go_v2//trace",

View File

@ -42,10 +42,10 @@ var globalHTTPFlightRecorder atomic.Pointer[HTTPFlightRecorder]
// TODO: rename HTTPFlightRecorder to FlightRecorder as it may sink to log now instead of just HTTP
// TODO: remove the old global flight recorder, clean up code.
type HTTPFlightRecorder struct {
ch chan<- []Event
oldEnabledCategories TraceCategory
counter atomic.Int64 // used when dump trigger config is sampling
Config *FlightRecorderConfig
ch chan<- []Event
enabledCategories TraceCategory
counter atomic.Int64 // used when dump trigger config is sampling
Config *FlightRecorderConfig
compiledDumpTriggerConfig
}
@ -453,7 +453,7 @@ func newHTTPFlightRecorder(config *FlightRecorderConfig) (*HTTPFlightRecorder, e
categories := parseCategories(config.EnabledCategories)
ret := &HTTPFlightRecorder{
oldEnabledCategories: tracing.GetEnabledCategories(),
enabledCategories: categories,
Config: config,
compiledDumpTriggerConfig: compiled,
}
@ -461,7 +461,6 @@ func newHTTPFlightRecorder(config *FlightRecorderConfig) (*HTTPFlightRecorder, e
zap.Stringer("category", categories),
zap.Any("mapping", compiled.nameMapping),
zap.Uint64s("truthTable", ret.truthTable))
SetCategories(categories)
globalHTTPFlightRecorder.Store(ret)
return ret, nil
}
@ -488,8 +487,7 @@ func GetFlightRecorder() *HTTPFlightRecorder {
}
// Close closes the HTTP flight recorder.
func (r *HTTPFlightRecorder) Close() {
tracing.SetCategories(r.oldEnabledCategories)
func (*HTTPFlightRecorder) Close() {
globalHTTPFlightRecorder.Store(nil)
}

View File

@ -22,6 +22,24 @@ import (
"github.com/stretchr/testify/require"
)
// Those functions are used in test only.
func (r *HTTPFlightRecorder) SetCategories(categories TraceCategory) {
r.enabledCategories = categories
}
func (r *HTTPFlightRecorder) Disable(categories TraceCategory) {
current := r.enabledCategories
next := current &^ categories
r.enabledCategories = next
}
// Enable enables trace events for the specified categories.
func (r *HTTPFlightRecorder) Enable(categories TraceCategory) {
current := r.enabledCategories
next := current | categories
r.enabledCategories = next
}
func testFlightRecorderConfigGoodCase(t *testing.T) {
conf1 := `{
"enabled_categories": [

View File

@ -0,0 +1,21 @@
load("@io_bazel_rules_go//go:def.bzl", "go_test")
go_test(
name = "test_test",
timeout = "short",
srcs = ["integration_test.go"],
flaky = True,
deps = [
"//pkg/config",
"//pkg/config/kerneltype",
"//pkg/keyspace",
"//pkg/kv",
"//pkg/session",
"//pkg/testkit",
"//pkg/util/logutil",
"//pkg/util/traceevent",
"//pkg/util/tracing",
"@com_github_stretchr_testify//require",
"@com_github_tikv_client_go_v2//trace",
],
)

View File

@ -69,9 +69,13 @@ func TestPrevTraceIDPersistence(t *testing.T) {
}()
// Enable all categories for this test
prevCategories := traceevent.GetEnabledCategories()
traceevent.SetCategories(traceevent.AllCategories)
defer traceevent.SetCategories(prevCategories)
var conf traceevent.FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err = traceevent.StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := traceevent.GetFlightRecorder()
defer fr.Close()
recorder := traceevent.NewRingBufferSink(100)
prevSink := traceevent.CurrentSink()
@ -164,6 +168,14 @@ func TestTraceControlIntegration(t *testing.T) {
tracing.Enable(tracing.TiKVRequest)
defer tracing.SetCategories(prevCategories)
var conf traceevent.FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"tikv_request"}
err := traceevent.StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := traceevent.GetFlightRecorder()
defer fr.Close()
ctx := context.Background()
flags := trace.GetTraceControlFlags(ctx)
require.True(t, flags.Has(trace.FlagTiKVCategoryRequest))

View File

@ -28,6 +28,8 @@ import (
"time"
"unsafe"
"github.com/pingcap/tidb/pkg/config/kerneltype"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/tracing"
"github.com/tikv/client-go/v2/trace"
@ -111,16 +113,28 @@ func init() {
var Enable = tracing.Enable
// IsEnabled returns whether the specified category is enabled.
var IsEnabled = tracing.IsEnabled
// Disable disables trace events for the specified categories.
var Disable = tracing.Disable
// SetCategories sets the enabled categories to exactly the specified value.
var SetCategories = tracing.SetCategories
func IsEnabled(category tracing.TraceCategory) bool {
if kerneltype.IsClassic() && !intest.InTest {
return false
}
fr := GetFlightRecorder()
if fr == nil {
return false
}
if uint64(fr.enabledCategories)&uint64(category) == 0 {
return false
}
return true
}
// GetEnabledCategories returns the currently enabled categories.
var GetEnabledCategories = tracing.GetEnabledCategories
func GetEnabledCategories() TraceCategory {
fr := GetFlightRecorder()
if fr == nil {
return TraceCategory(0)
}
return fr.enabledCategories
}
// NormalizeMode converts a user-supplied tracing mode string into its canonical representation.
func NormalizeMode(mode string) (string, error) {

View File

@ -25,6 +25,16 @@ import (
"go.uber.org/zap"
)
func TestSuite(t *testing.T) {
// Those tests use global variables, make them run in sequence to avoid flaky.
t.Run("TestTraceEventCategories", testTraceEventCategories)
t.Run("TestTraceEventCategoryFiltering", testTraceEventCategoryFiltering)
t.Run("TestTraceEventRecordsEvent", testTraceEventRecordsEvent)
t.Run("TestTraceEventCarriesTraceID", testTraceEventCarriesTraceID)
t.Run("TestTraceEventLoggingSwitch", testTraceEventLoggingSwitch)
t.Run("TestFlightRecorderCoolingOff", testFlightRecorderCoolingOff)
}
func installRecorderSink(t *testing.T, capacity int) *RingBufferSink {
t.Helper()
@ -37,42 +47,47 @@ func installRecorderSink(t *testing.T, capacity int) *RingBufferSink {
return recorder
}
func TestTraceEventCategories(t *testing.T) {
func testTraceEventCategories(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
original := GetEnabledCategories()
t.Cleanup(func() {
SetCategories(original)
})
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
require.True(t, IsEnabled(TxnLifecycle))
Disable(TxnLifecycle)
fr.Disable(TxnLifecycle)
require.False(t, IsEnabled(TxnLifecycle))
Enable(TxnLifecycle)
fr.Enable(TxnLifecycle)
require.True(t, IsEnabled(TxnLifecycle))
SetCategories(0)
fr.SetCategories(0)
require.False(t, IsEnabled(TxnLifecycle))
SetCategories(AllCategories)
fr.SetCategories(AllCategories)
require.True(t, IsEnabled(TxnLifecycle))
}
func TestTraceEventCategoryFiltering(t *testing.T) {
func testTraceEventCategoryFiltering(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
originalCategories := GetEnabledCategories()
prevMode := CurrentMode()
t.Cleanup(func() {
SetCategories(originalCategories)
_, _ = SetMode(prevMode)
FlightRecorder().DiscardOrFlush()
})
SetCategories(0)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
fr.SetCategories(0)
_, _ = SetMode(ModeFull)
FlightRecorder().DiscardOrFlush()
recorder := installRecorderSink(t, 8)
@ -109,21 +124,20 @@ func TestTraceEventModes(t *testing.T) {
require.Error(t, err)
}
func TestTraceEventRecordsEvent(t *testing.T) {
func testTraceEventRecordsEvent(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
originalCategories := GetEnabledCategories()
prevMode := CurrentMode()
prevSink := CurrentSink()
t.Cleanup(func() {
SetCategories(originalCategories)
_, _ = SetMode(prevMode)
SetSink(prevSink)
FlightRecorder().DiscardOrFlush()
})
SetCategories(AllCategories)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
fr.SetCategories(AllCategories)
_, _ = SetMode(ModeFull)
FlightRecorder().DiscardOrFlush()
recorder := installRecorderSink(t, 8)
@ -147,19 +161,20 @@ func TestTraceEventRecordsEvent(t *testing.T) {
require.Len(t, recorded[0].Fields, 2)
}
func TestTraceEventCarriesTraceID(t *testing.T) {
func testTraceEventCarriesTraceID(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
originalCategories := GetEnabledCategories()
prevMode := CurrentMode()
t.Cleanup(func() {
SetCategories(originalCategories)
_, _ = SetMode(prevMode)
FlightRecorder().DiscardOrFlush()
})
SetCategories(AllCategories)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
fr.SetCategories(AllCategories)
_, _ = SetMode(ModeFull)
FlightRecorder().DiscardOrFlush()
@ -172,19 +187,20 @@ func TestTraceEventCarriesTraceID(t *testing.T) {
require.Equal(t, rawTrace, events[0].TraceID)
}
func TestTraceEventLoggingSwitch(t *testing.T) {
func testTraceEventLoggingSwitch(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
originalCategories := GetEnabledCategories()
prevMode := CurrentMode()
t.Cleanup(func() {
SetCategories(originalCategories)
_, _ = SetMode(prevMode)
FlightRecorder().DiscardOrFlush()
})
SetCategories(AllCategories)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
fr.SetCategories(AllCategories)
_, _ = SetMode(ModeBase)
FlightRecorder().DiscardOrFlush()
recorder := installRecorderSink(t, 8)
@ -200,8 +216,8 @@ func TestTraceEventLoggingSwitch(t *testing.T) {
_, _ = SetMode(ModeFull)
TraceEvent(ctx, TxnLifecycle, "enabled-log", zap.Int("value", 2))
fr := FlightRecorder().Snapshot()
require.Len(t, fr, flightBefore+2)
fr1 := FlightRecorder().Snapshot()
require.Len(t, fr1, flightBefore+2)
recorded := recorder.Snapshot()
require.Len(t, recorded, disabledLogged+1)
require.Equal(t, "enabled-log", recorded[len(recorded)-1].Name)
@ -265,12 +281,18 @@ func TestCategoryNames(t *testing.T) {
// BenchmarkTraceEventDisabled benchmarks the overhead when tracing is disabled.
func BenchmarkTraceEventDisabled(b *testing.B) {
ctx := context.Background()
prevCategories := GetEnabledCategories()
prevMode := CurrentMode()
SetCategories(AllCategories)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(b, err)
fr := GetFlightRecorder()
defer fr.Close()
_, _ = SetMode(ModeOff)
b.Cleanup(func() {
SetCategories(prevCategories)
_, _ = SetMode(prevMode)
})
@ -285,12 +307,18 @@ func BenchmarkTraceEventDisabled(b *testing.B) {
// BenchmarkTraceEventEnabled benchmarks the overhead when tracing is enabled.
func BenchmarkTraceEventEnabled(b *testing.B) {
ctx := context.Background()
prevCategories := GetEnabledCategories()
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(b, err)
fr := GetFlightRecorder()
defer fr.Close()
prevMode := CurrentMode()
Enable(TxnLifecycle)
_, _ = SetMode(ModeFull)
b.Cleanup(func() {
SetCategories(prevCategories)
_, _ = SetMode(prevMode)
})
@ -300,7 +328,6 @@ func BenchmarkTraceEventEnabled(b *testing.B) {
zap.String("key", "value"),
zap.Int("iteration", i))
}
SetCategories(0)
}
func extractNames(events []Event) []string {
@ -311,20 +338,27 @@ func extractNames(events []Event) []string {
return names
}
func TestFlightRecorderCoolingOff(t *testing.T) {
func testFlightRecorderCoolingOff(t *testing.T) {
if kerneltype.IsClassic() {
t.Skip("trace events only work for next-gen kernel")
}
originalCategories := GetEnabledCategories()
prevMode := CurrentMode()
t.Cleanup(func() {
SetCategories(originalCategories)
_, _ = SetMode(prevMode)
FlightRecorder().DiscardOrFlush()
lastDumpTime.Store(0) // Reset cooling-off state
})
SetCategories(AllCategories)
var conf FlightRecorderConfig
conf.Initialize()
conf.EnabledCategories = []string{"*"}
err := StartLogFlightRecorder(&conf)
require.NoError(t, err)
fr := GetFlightRecorder()
defer fr.Close()
fr.SetCategories(AllCategories)
_, _ = SetMode(ModeFull)
FlightRecorder().DiscardOrFlush()
lastDumpTime.Store(0) // Reset cooling-off state