@@ -15,7 +15,6 @@ import (
15
15
16
16
"github.com/go-kit/log"
17
17
"github.com/go-kit/log/level"
18
- "github.com/grafana/alloy/internal/util"
19
18
"github.com/prometheus/client_golang/prometheus"
20
19
"github.com/prometheus/prometheus/model/exemplar"
21
20
"github.com/prometheus/prometheus/model/histogram"
@@ -29,6 +28,8 @@ import (
29
28
"github.com/prometheus/prometheus/tsdb/record"
30
29
"github.com/prometheus/prometheus/tsdb/wlog"
31
30
"go.uber.org/atomic"
31
+
32
+ "github.com/grafana/alloy/internal/util"
32
33
)
33
34
34
35
// ErrWALClosed is an error returned when a WAL operation can't run because the
@@ -210,14 +211,19 @@ func (w *Storage) replayWAL() error {
210
211
}
211
212
212
213
level .Info (w .logger ).Log ("msg" , "replaying WAL, this may take a while" , "dir" , w .wal .Dir ())
213
- dir , startFrom , err := wlog .LastCheckpoint (w .wal .Dir ())
214
- if err != nil && err != record .ErrNotFound {
215
- return fmt .Errorf ("find last checkpoint: %w" , err )
214
+ dir , startFrom , cpErr := wlog .LastCheckpoint (w .wal .Dir ())
215
+ if cpErr != nil && ! errors . Is ( cpErr , record .ErrNotFound ) {
216
+ return fmt .Errorf ("find last checkpoint: %w" , cpErr )
216
217
}
217
218
218
- multiRef := map [chunks.HeadSeriesRef ]chunks.HeadSeriesRef {}
219
+ // Find the last segment.
220
+ _ , lastSegment , err := wlog .Segments (w .wal .Dir ())
221
+ if err != nil {
222
+ return fmt .Errorf ("finding WAL segments: %w" , err )
223
+ }
219
224
220
- if err == nil {
225
+ multiRef := map [chunks.HeadSeriesRef ]chunks.HeadSeriesRef {}
226
+ if cpErr == nil {
221
227
sr , err := wlog .NewSegmentsReader (dir )
222
228
if err != nil {
223
229
return fmt .Errorf ("open checkpoint: %w" , err )
@@ -230,41 +236,35 @@ func (w *Storage) replayWAL() error {
230
236
231
237
// A corrupted checkpoint is a hard error for now and requires user
232
238
// intervention. There's likely little data that can be recovered anyway.
233
- if err := w .loadWAL (wlog .NewReader (sr ), multiRef ); err != nil {
239
+ if err := w .loadWAL (wlog .NewReader (sr ), multiRef , lastSegment ); err != nil {
234
240
return fmt .Errorf ("backfill checkpoint: %w" , err )
235
241
}
236
242
startFrom ++
237
243
level .Info (w .logger ).Log ("msg" , "WAL checkpoint loaded" )
238
244
}
239
245
240
- // Find the last segment.
241
- _ , last , err := wlog .Segments (w .wal .Dir ())
242
- if err != nil {
243
- return fmt .Errorf ("finding WAL segments: %w" , err )
244
- }
245
-
246
246
// Backfill segments from the most recent checkpoint onwards.
247
- for i := startFrom ; i <= last ; i ++ {
247
+ for i := startFrom ; i <= lastSegment ; i ++ {
248
248
s , err := wlog .OpenReadSegment (wlog .SegmentName (w .wal .Dir (), i ))
249
249
if err != nil {
250
250
return fmt .Errorf ("open WAL segment %d: %w" , i , err )
251
251
}
252
252
253
253
sr := wlog .NewSegmentBufReader (s )
254
- err = w .loadWAL (wlog .NewReader (sr ), multiRef )
254
+ err = w .loadWAL (wlog .NewReader (sr ), multiRef , lastSegment )
255
255
if err := sr .Close (); err != nil {
256
256
level .Warn (w .logger ).Log ("msg" , "error while closing the wal segments reader" , "err" , err )
257
257
}
258
258
if err != nil {
259
259
return err
260
260
}
261
- level .Info (w .logger ).Log ("msg" , "WAL segment loaded" , "segment" , i , "maxSegment" , last )
261
+ level .Info (w .logger ).Log ("msg" , "WAL segment loaded" , "segment" , i , "maxSegment" , lastSegment )
262
262
}
263
263
264
264
return nil
265
265
}
266
266
267
- func (w * Storage ) loadWAL (r * wlog.Reader , multiRef map [chunks.HeadSeriesRef ]chunks.HeadSeriesRef ) (err error ) {
267
+ func (w * Storage ) loadWAL (r * wlog.Reader , multiRef map [chunks.HeadSeriesRef ]chunks.HeadSeriesRef , lastSegment int ) (err error ) {
268
268
var (
269
269
dec record.Decoder
270
270
lastRef = chunks .HeadSeriesRef (w .nextRef .Load ())
@@ -367,21 +367,20 @@ func (w *Storage) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chun
367
367
switch v := d .(type ) {
368
368
case []record.RefSeries :
369
369
for _ , s := range v {
370
- // If this is a new series, create it in memory without a timestamp.
371
- // If we read in a sample for it, we'll use the timestamp of the latest
372
- // sample. Otherwise, the series is stale and will be deleted once
373
- // the truncation is performed.
374
- if w .series .GetByID (s .Ref ) == nil {
375
- series := & memSeries {ref : s .Ref , lset : s .Labels , lastTs : 0 }
376
- w .series .Set (s .Labels .Hash (), series )
377
- multiRef [s .Ref ] = series .ref
370
+ // Make sure we don't try to reuse a Ref that already exists in the WAL.
371
+ if s .Ref > lastRef {
372
+ lastRef = s .Ref
373
+ }
378
374
375
+ series := & memSeries {ref : s .Ref , lset : s .Labels , lastTs : 0 }
376
+ series , created := w .series .GetOrSet (s .Labels .Hash (), s .Labels , series )
377
+ if ! created {
378
+ multiRef [s .Ref ] = series .ref
379
+ // Keep the duplicate series in the checkpoint until the latest segment.
380
+ w .deleted [series .ref ] = lastSegment
381
+ } else {
379
382
w .metrics .numActiveSeries .Inc ()
380
383
w .metrics .totalCreatedSeries .Inc ()
381
-
382
- if s .Ref > lastRef {
383
- lastRef = s .Ref
384
- }
385
384
}
386
385
}
387
386
0 commit comments