Skip to content

Commit 68eea9a

Browse files
authored
[read_commitlog] Add summary (#4060)
1 parent af06fb6 commit 68eea9a

File tree

3 files changed

+207
-29
lines changed

3 files changed

+207
-29
lines changed

src/cmd/tools/read_commitlog/README.md

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,28 @@
77
$ git clone [email protected]:m3db/m3.git
88
$ make read_commitlog
99
$ ./bin/read_commitlog
10-
Usage: read_commitlog [-p value] [-f value]
11-
-p, --path=value
12-
Commitlog file path [e.g. /var/lib/m3db/commitlogs/commitlog-0-161023.db]
10+
Usage: read_commitlog [-a value] [-f value] [-p value] [-s value] [-t value] [parameters ...]
11+
-a, --action=value
12+
Action [print,summary]. Defaults to 'print'
1313
-f, --id-filter=value
14-
ID Contains Filter [e.g. xyz]
14+
ID Contains Filter (optional)
15+
-p, --path=value file path [e.g.
16+
/var/lib/m3db/commitlogs/commitlog-0-161023.db]
17+
-s, --id-size-filter=value
18+
ID Size (bytes) Filter (optional)
19+
-t, --top=value Print out only top N IDs
1520
16-
# example usage
17-
# read_commitlog -p /var/lib/m3db/commitlogs/commitlog-0-161023.db -f 'metric-name' > /tmp/sample-data.out
21+
# Examples.
22+
23+
# get all datapoints for a given metric
24+
$ read_commitlog -p /var/lib/m3db/commitlogs/commitlog-0-161023.db -f 'metric-name'
25+
26+
# get summary about commit log file
27+
$ read_commitlog -p /var/lib/m3db/commitlogs/commitlog-0-161023.db -a summary
28+
29+
# get summary about commit log file including top 100 largest and most frequent IDs
30+
$ read_commitlog -p /var/lib/m3db/commitlogs/commitlog-0-161023.db -a summary -t 100
31+
32+
# get summary about commit log file including only IDs above 1000 bytes
33+
$ read_commitlog -p /var/lib/m3db/commitlogs/commitlog-0-161023.db -a summary -s 1000
1834
```
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright (c) 2022 Uber Technologies, Inc.
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE.
20+
21+
package main
22+
23+
import (
24+
"errors"
25+
"io"
26+
"log"
27+
"strings"
28+
29+
"github.com/m3db/m3/src/dbnode/persist/fs/commitlog"
30+
)
31+
32+
type filteringReader struct {
33+
reader commitlog.Reader
34+
idFilter *string
35+
idSizeFilter *int
36+
}
37+
38+
func newFilteringReader(path string, idFilter *string, idSizeFilter *int) (*filteringReader, error) {
39+
opts := commitlog.NewReaderOptions(commitlog.NewOptions(), false)
40+
reader := commitlog.NewReader(opts)
41+
if _, err := reader.Open(path); err != nil {
42+
return nil, err
43+
}
44+
return &filteringReader{reader: reader, idFilter: idFilter, idSizeFilter: idSizeFilter}, nil
45+
}
46+
47+
func (c *filteringReader) Read() (commitlog.LogEntry, bool, error) {
48+
for {
49+
entry, err := c.reader.Read()
50+
if errors.Is(err, io.EOF) {
51+
break
52+
}
53+
if err != nil {
54+
return commitlog.LogEntry{}, false, err
55+
}
56+
series := entry.Series
57+
if *c.idFilter != "" && !strings.Contains(series.ID.String(), *c.idFilter) {
58+
continue
59+
}
60+
if *c.idSizeFilter != 0 && len(series.ID.Bytes()) < *c.idSizeFilter {
61+
continue
62+
}
63+
return entry, true, nil
64+
}
65+
return commitlog.LogEntry{}, false, nil
66+
}
67+
68+
func (c *filteringReader) Close() {
69+
if c != nil && c.reader != nil {
70+
if err := c.reader.Close(); err != nil {
71+
log.Fatalf("unable to close reader: %v", err)
72+
}
73+
}
74+
}

src/cmd/tools/read_commitlog/main/main.go

Lines changed: 111 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,26 @@ package main
2222

2323
import (
2424
"encoding/base64"
25-
"errors"
2625
"fmt"
27-
"io"
2826
"log"
2927
"os"
30-
"strings"
28+
"sort"
3129
"time"
3230

3331
"github.com/pborman/getopt"
3432
"go.uber.org/zap"
3533

36-
"github.com/m3db/m3/src/dbnode/persist/fs/commitlog"
34+
"github.com/m3db/m3/src/x/ident"
35+
xtime "github.com/m3db/m3/src/x/time"
3736
)
3837

3938
func main() {
4039
var (
41-
path = getopt.StringLong("path", 'p', "", "file path [e.g. /var/lib/m3db/commitlogs/commitlog-0-161023.db]")
42-
idFilter = getopt.StringLong("id-filter", 'f', "", "ID Contains Filter (optional)")
40+
path = getopt.StringLong("path", 'p', "", "file path [e.g. /var/lib/m3db/commitlogs/commitlog-0-161023.db]")
41+
idFilter = getopt.StringLong("id-filter", 'f', "", "ID Contains Filter (optional)")
42+
idSizeFilter = getopt.IntLong("id-size-filter", 's', 0, "ID Size (bytes) Filter (optional)")
43+
mode = getopt.StringLong("mode", 'm', "", "Action [print,summary]. Defaults to 'print'")
44+
top = getopt.IntLong("top", 't', 0, "Print out only top N IDs")
4345
)
4446
getopt.Parse()
4547

@@ -54,34 +56,40 @@ func main() {
5456
os.Exit(1)
5557
}
5658

57-
opts := commitlog.NewReaderOptions(commitlog.NewOptions(), false)
58-
reader := commitlog.NewReader(opts)
59-
60-
_, err = reader.Open(*path)
59+
reader, err := newFilteringReader(*path, idFilter, idSizeFilter)
6160
if err != nil {
6261
logger.Fatalf("unable to open reader: %v", err)
6362
}
63+
defer reader.Close()
64+
65+
switch *mode {
66+
case "summary":
67+
err = printSummary(reader, top)
68+
default:
69+
err = printMetrics(reader)
70+
}
71+
if err != nil {
72+
logger.Fatalf("error while reading commitlog: %v", err)
73+
}
74+
}
6475

76+
func printMetrics(reader *filteringReader) error {
6577
var (
6678
entryCount uint32
6779
annotationSizeTotal uint64
6880
start = time.Now()
6981
)
7082

7183
for {
72-
entry, err := reader.Read()
73-
if errors.Is(err, io.EOF) {
74-
break
75-
}
84+
entry, found, err := reader.Read()
7685
if err != nil {
77-
logger.Fatalf("err reading commitlog: %v", err)
86+
return err
7887
}
79-
80-
series := entry.Series
81-
if *idFilter != "" && !strings.Contains(series.ID.String(), *idFilter) {
82-
continue
88+
if !found {
89+
break
8390
}
8491

92+
series := entry.Series
8593
fmt.Printf("{id: %s, dp: %+v, ns: %s, shard: %d", // nolint: forbidigo
8694
series.ID, entry.Datapoint, entry.Series.Namespace, entry.Series.Shard)
8795
if len(entry.Annotation) > 0 {
@@ -96,11 +104,91 @@ func main() {
96104

97105
runTime := time.Since(start)
98106

99-
if err := reader.Close(); err != nil {
100-
log.Fatalf("unable to close reader: %v", err)
101-
}
102-
103107
fmt.Printf("\nRunning time: %s\n", runTime) // nolint: forbidigo
104108
fmt.Printf("%d entries read\n", entryCount) // nolint: forbidigo
105109
fmt.Printf("Total annotation size: %d bytes\n", annotationSizeTotal) // nolint: forbidigo
110+
return nil
106111
}
112+
113+
func printSummary(reader *filteringReader, top *int) error {
114+
var (
115+
entryCount uint32
116+
start = time.Now()
117+
datapointCount = map[ident.ID]uint32{}
118+
totalIDSize uint64
119+
earliestDatapoint xtime.UnixNano
120+
oldestDatapoint xtime.UnixNano
121+
)
122+
123+
for {
124+
entry, found, err := reader.Read()
125+
if err != nil {
126+
return err
127+
}
128+
if !found {
129+
break
130+
}
131+
dp := entry.Datapoint
132+
133+
if earliestDatapoint == 0 || earliestDatapoint > dp.TimestampNanos {
134+
earliestDatapoint = dp.TimestampNanos
135+
}
136+
if oldestDatapoint == 0 || oldestDatapoint < dp.TimestampNanos {
137+
oldestDatapoint = dp.TimestampNanos
138+
}
139+
140+
datapointCount[entry.Series.ID]++
141+
142+
entryCount++
143+
}
144+
145+
runTime := time.Since(start)
146+
147+
fmt.Printf("\nRunning time: %s\n", runTime) // nolint: forbidigo
148+
fmt.Printf("%d entries read\n", entryCount) // nolint: forbidigo
149+
fmt.Printf("time range [%s:%s]\n", earliestDatapoint.String(), oldestDatapoint.String()) // nolint: forbidigo
150+
151+
datapointCountArr := idPairs{}
152+
sizeArr := idPairs{}
153+
for ID, count := range datapointCount {
154+
IDSize := len(ID.Bytes())
155+
totalIDSize += uint64(IDSize)
156+
datapointCountArr = append(datapointCountArr, idPair{ID: ID, Value: count})
157+
sizeArr = append(sizeArr, idPair{ID: ID, Value: uint32(IDSize)})
158+
}
159+
160+
sort.Sort(sort.Reverse(datapointCountArr))
161+
sort.Sort(sort.Reverse(sizeArr))
162+
163+
fmt.Printf("total ID size: %d bytes\n", totalIDSize) // nolint: forbidigo
164+
fmt.Printf("total distinct number of IDs %d \n", len(datapointCount)) // nolint: forbidigo
165+
166+
limit := len(datapointCountArr)
167+
if *top > 0 && *top < limit {
168+
limit = *top
169+
}
170+
fmt.Printf("ID datapoint counts: \n") // nolint: forbidigo
171+
for i := 0; i < limit; i++ {
172+
pair := datapointCountArr[i]
173+
fmt.Printf("%-10d %s\n", pair.Value, pair.ID.String()) // nolint: forbidigo
174+
}
175+
176+
fmt.Printf("ID sizes(bytes): \n") // nolint: forbidigo
177+
for i := 0; i < limit; i++ {
178+
pair := sizeArr[i]
179+
fmt.Printf("%-10d %s\n", pair.Value, pair.ID.String()) // nolint: forbidigo
180+
}
181+
182+
return nil
183+
}
184+
185+
type idPair struct {
186+
ID ident.ID
187+
Value uint32
188+
}
189+
190+
type idPairs []idPair
191+
192+
func (p idPairs) Len() int { return len(p) }
193+
func (p idPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
194+
func (p idPairs) Less(i, j int) bool { return p[i].Value < p[j].Value }

0 commit comments

Comments
 (0)