Files
tidb/pkg/util/partialjson/extract.go

171 lines
4.0 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package partialjson
import (
"bytes"
"encoding/json"
"fmt"
"io"
)
type topLevelJSONTokenIter struct {
d *json.Decoder
level int
}
func newTopLevelJSONTokenIter(content []byte) *topLevelJSONTokenIter {
d := json.NewDecoder(bytes.NewReader(content))
d.UseNumber()
return &topLevelJSONTokenIter{
d: d,
}
}
func unexpectedEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}
// readName reads a name belongs to the top-level of JSON objects. Caller should
// call readOrDiscardValue to consume its value before calling next readName.
func (i *topLevelJSONTokenIter) readName() (string, error) {
ts, err := i.next(false)
if err != nil {
return "", err
}
if len(ts) != 1 {
return "", fmt.Errorf("unexpected JSON name, %v", ts)
}
name, ok := ts[0].(string)
if !ok {
// > An object is an unordered collection of zero or more name/value
// pairs, where a name is a string...
// https://datatracker.ietf.org/doc/html/rfc8259#section-1
return "", fmt.Errorf("unexpected JSON name, %T %v", ts, ts)
}
return name, nil
}
// readOrDiscardValue reads a value belongs to the top-level of JSON objects. It
// must be called after readName. If caller don't need the value, it can pass
// true to discard it.
func (i *topLevelJSONTokenIter) readOrDiscardValue(discard bool) ([]json.Token, error) {
return i.next(discard)
}
// next is an internal method to iterate the JSON tokens. Callers should use
// readName / readOrDiscardValue instead.
func (i *topLevelJSONTokenIter) next(discard bool) ([]json.Token, error) {
if i.level == 0 {
t, err := i.d.Token()
if err != nil {
return nil, err
}
if t != json.Delim('{') {
return nil, fmt.Errorf(
"expected '{' for topLevelJSONTokenIter, got %T %v",
t, t,
)
}
i.level++
}
var longValue []json.Token
if i.level == 1 {
t, err := i.d.Token()
if err != nil {
return nil, unexpectedEOF(err)
}
delim, ok := t.(json.Delim)
if !ok {
return []json.Token{t}, nil
}
switch delim {
case '}', ']':
// we are at top level and now exit this level, which means the content is end.
i.level--
return nil, io.EOF
case '{', '[':
i.level++
// go to below loop to consume this level
if !discard {
longValue = make([]json.Token, 0, 16)
longValue = append(longValue, t)
}
}
}
for i.level > 1 {
t, err := i.d.Token()
if err != nil {
return nil, unexpectedEOF(err)
}
if !discard {
longValue = append(longValue, t)
}
delim, ok := t.(json.Delim)
if !ok {
continue
}
switch delim {
case '{', '[':
i.level++
case '}', ']':
i.level--
}
}
return longValue, nil
}
// ExtractTopLevelMembers extracts tokens of given top level members from a JSON
// text. It will stop parsing when all keys are found.
func ExtractTopLevelMembers(content []byte, names []string) (map[string][]json.Token, error) {
remainNames := make(map[string]struct{}, len(names))
for _, k := range names {
remainNames[k] = struct{}{}
}
ret := make(map[string][]json.Token, len(names))
iter := newTopLevelJSONTokenIter(content)
for len(remainNames) > 0 {
name, err := iter.readName()
if err != nil {
return nil, err
}
_, ok := remainNames[name]
if ok {
val, err2 := iter.readOrDiscardValue(false)
if err2 != nil {
return nil, err2
}
ret[name] = val
delete(remainNames, name)
} else {
_, err2 := iter.readOrDiscardValue(true)
if err2 != nil {
return nil, err2
}
}
}
return ret, nil
}