@@ -10,6 +10,7 @@ import (
10
10
"github.com/prometheus/prometheus/model/exemplar"
11
11
"github.com/prometheus/prometheus/model/labels"
12
12
"github.com/prometheus/prometheus/tsdb/chunks"
13
+ "go.uber.org/atomic"
13
14
)
14
15
15
16
// memSeries is a chunkless version of tsdb.memSeries.
@@ -80,11 +81,12 @@ func (m seriesHashmap) Delete(hash uint64, ref chunks.HeadSeriesRef) {
80
81
// Filling the padded space with the maps was profiled to be slower -
81
82
// likely due to the additional pointer dereferences.
82
83
type stripeSeries struct {
83
- size int
84
- series []map [chunks.HeadSeriesRef ]* memSeries
85
- hashes []seriesHashmap
86
- exemplars []map [chunks.HeadSeriesRef ]* exemplar.Exemplar
87
- locks []stripeLock
84
+ size int
85
+ series []map [chunks.HeadSeriesRef ]* memSeries
86
+ hashes []seriesHashmap
87
+ exemplars []map [chunks.HeadSeriesRef ]* exemplar.Exemplar
88
+ locks []stripeLock
89
+ initialized * atomic.Bool
88
90
89
91
gcMut sync.Mutex
90
92
}
@@ -95,13 +97,18 @@ type stripeLock struct {
95
97
_ [40 ]byte
96
98
}
97
99
100
+ // newStripeSeries creates a new stripeSeries with the given stripe size in an uninitialized state.
101
+ // When in an uninitialized state, reads and writes are not lock protected. After loading any
102
+ // initial data, a call to MarkInitialized() must be made before using the stripeSeries for
103
+ // ensuring proper function of stripeSeries.gc().
98
104
func newStripeSeries (stripeSize int ) * stripeSeries {
99
105
s := & stripeSeries {
100
- size : stripeSize ,
101
- series : make ([]map [chunks.HeadSeriesRef ]* memSeries , stripeSize ),
102
- hashes : make ([]seriesHashmap , stripeSize ),
103
- exemplars : make ([]map [chunks.HeadSeriesRef ]* exemplar.Exemplar , stripeSize ),
104
- locks : make ([]stripeLock , stripeSize ),
106
+ size : stripeSize ,
107
+ series : make ([]map [chunks.HeadSeriesRef ]* memSeries , stripeSize ),
108
+ hashes : make ([]seriesHashmap , stripeSize ),
109
+ exemplars : make ([]map [chunks.HeadSeriesRef ]* exemplar.Exemplar , stripeSize ),
110
+ locks : make ([]stripeLock , stripeSize ),
111
+ initialized : atomic .NewBool (false ),
105
112
}
106
113
for i := range s .series {
107
114
s .series [i ] = map [chunks.HeadSeriesRef ]* memSeries {}
@@ -115,9 +122,63 @@ func newStripeSeries(stripeSize int) *stripeSeries {
115
122
return s
116
123
}
117
124
125
+ // MarkInitialized marks the stripeSeries initialized, allowing usage of stripeSeries.gc(). Returns
126
+ // true if the stripeSeries was not initialized before, false otherwise.
127
+ func (s * stripeSeries ) MarkInitialized () bool {
128
+ return s .initialized .CompareAndSwap (false , true )
129
+ }
130
+
131
+ // RemoveInactiveSeries removes all series that have a lastTs of 0 while the stripeSeries is still in
132
+ // an uninitialized state. If the stripeSeries is already initialized, it returns 0 and false. Otherwise,
133
+ // it returns the number of series that were removed and true.
134
+ //
135
+ // The stripeSeries assumes that a chunks.HeadSeriesRef uniquely refers to a series in the stripeSeries.
136
+ // But in practice, a chunks.HeadSeriesRef can remain on a WAL even after it has been removed from the
137
+ // stripeSeries. If the series comes back before the original is removed from the WAL we are left with
138
+ // multiple chunks.HeadSeriesRef for the same series. If the WAL is reloaded in this state, we end up with a
139
+ // series leak. A call to stripeSeries.gc() is only capable of removing one instance of the chunks.HeadSeriesRef
140
+ // as it assumes there can only be one chunks.HeadSeriesRef for a series. The remaining chunks.HeadSeriesRefs are
141
+ // left in the stripeSeries and overtime can accumulate to consume a very large amount of memory.
142
+ func (s * stripeSeries ) RemoveInactiveSeries () (int , bool ) {
143
+ if s .initialized .Load () {
144
+ return 0 , false
145
+ }
146
+
147
+ inactiveSeries := 0
148
+ // Start with hashes first because it's easier to get to a series from the hash than a hash from a series.
149
+ for _ , hashSeries := range s .hashes {
150
+ for hash , seriesForHash := range hashSeries {
151
+ for _ , series := range seriesForHash {
152
+ if series .lastTs == 0 {
153
+ hashSeries .Delete (hash , series .ref )
154
+ inactiveSeries ++
155
+
156
+ // Get the seriesRef lock to delete the series from s.series.
157
+ refLock := s .refLock (series .ref )
158
+ delete (s .series [refLock ], series .ref )
159
+ }
160
+ }
161
+ }
162
+ }
163
+
164
+ for _ , seriesRefs := range s .series {
165
+ for head , series := range seriesRefs {
166
+ if series .lastTs == 0 {
167
+ delete (s .series [inactiveSeries ], head )
168
+ inactiveSeries ++
169
+ }
170
+ }
171
+ }
172
+
173
+ return inactiveSeries , true
174
+ }
175
+
118
176
// gc garbage collects old chunks that are strictly before mint and removes
119
177
// series entirely that have no chunks left.
120
178
func (s * stripeSeries ) gc (mint int64 ) map [chunks.HeadSeriesRef ]struct {} {
179
+ if ! s .initialized .Load () {
180
+ return nil
181
+ }
121
182
// NOTE(rfratto): GC will grab two locks, one for the hash and the other for
122
183
// series. It's not valid for any other function to grab both locks,
123
184
// otherwise a deadlock might occur when running GC in parallel with
@@ -141,7 +202,7 @@ func (s *stripeSeries) gc(mint int64) map[chunks.HeadSeriesRef]struct{} {
141
202
142
203
// The series is stale. We need to obtain a second lock for the
143
204
// ref if it's different than the hash lock.
144
- refLock := int (series .ref ) & ( s . size - 1 )
205
+ refLock := int (s . refLock ( series .ref ))
145
206
if hashLock != refLock {
146
207
s .locks [refLock ].Lock ()
147
208
}
@@ -168,14 +229,14 @@ func (s *stripeSeries) gc(mint int64) map[chunks.HeadSeriesRef]struct{} {
168
229
}
169
230
170
231
func (s * stripeSeries ) GetByID (id chunks.HeadSeriesRef ) * memSeries {
171
- refLock := uint64 ( id ) & uint64 ( s . size - 1 )
232
+ refLock := s . refLock ( id )
172
233
s .locks [refLock ].RLock ()
173
234
defer s .locks [refLock ].RUnlock ()
174
235
return s.series [refLock ][id ]
175
236
}
176
237
177
238
func (s * stripeSeries ) GetByHash (hash uint64 , lset labels.Labels ) * memSeries {
178
- hashLock := hash & uint64 ( s . size - 1 )
239
+ hashLock := s . hashLock ( hash )
179
240
180
241
s .locks [hashLock ].RLock ()
181
242
defer s .locks [hashLock ].RUnlock ()
@@ -184,8 +245,8 @@ func (s *stripeSeries) GetByHash(hash uint64, lset labels.Labels) *memSeries {
184
245
185
246
func (s * stripeSeries ) Set (hash uint64 , series * memSeries ) {
186
247
var (
187
- hashLock = hash & uint64 ( s . size - 1 )
188
- refLock = uint64 (series .ref ) & uint64 ( s . size - 1 )
248
+ hashLock = s . hashLock ( hash )
249
+ refLock = s . refLock (series .ref )
189
250
)
190
251
191
252
// We can't hold both locks at once otherwise we might deadlock with a
@@ -203,7 +264,7 @@ func (s *stripeSeries) Set(hash uint64, series *memSeries) {
203
264
}
204
265
205
266
func (s * stripeSeries ) GetLatestExemplar (ref chunks.HeadSeriesRef ) * exemplar.Exemplar {
206
- i := uint64 ( ref ) & uint64 ( s . size - 1 )
267
+ i := s . refLock ( ref )
207
268
208
269
s .locks [i ].RLock ()
209
270
exemplar := s.exemplars [i ][ref ]
@@ -213,7 +274,7 @@ func (s *stripeSeries) GetLatestExemplar(ref chunks.HeadSeriesRef) *exemplar.Exe
213
274
}
214
275
215
276
func (s * stripeSeries ) SetLatestExemplar (ref chunks.HeadSeriesRef , exemplar * exemplar.Exemplar ) {
216
- i := uint64 ( ref ) & uint64 ( s . size - 1 )
277
+ i := s . refLock ( ref )
217
278
218
279
// Make sure that's a valid series id and record its latest exemplar
219
280
s .locks [i ].Lock ()
@@ -227,6 +288,14 @@ func (s *stripeSeries) iterator() *stripeSeriesIterator {
227
288
return & stripeSeriesIterator {s }
228
289
}
229
290
291
+ func (s * stripeSeries ) hashLock (hash uint64 ) uint64 {
292
+ return hash & uint64 (s .size - 1 )
293
+ }
294
+
295
+ func (s * stripeSeries ) refLock (ref chunks.HeadSeriesRef ) uint64 {
296
+ return uint64 (ref ) & uint64 (s .size - 1 )
297
+ }
298
+
230
299
// stripeSeriesIterator allows to iterate over series through a channel.
231
300
// The channel should always be completely consumed to not leak.
232
301
type stripeSeriesIterator struct {
@@ -243,7 +312,7 @@ func (it *stripeSeriesIterator) Channel() <-chan *memSeries {
243
312
for _ , series := range it .s .series [i ] {
244
313
series .Lock ()
245
314
246
- j := int (series .lset .Hash ()) & ( it . s . size - 1 )
315
+ j := int (it . s . hashLock ( series .lset .Hash ()))
247
316
if i != j {
248
317
it .s .locks [j ].RLock ()
249
318
}
0 commit comments