@@ -120,78 +120,99 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) {
120120// sake of documentation, 0s are free pages and 1s are allocated pages.
121121type pallocBits pageBits
122122
123- // consec8tab is a table containing the number of consecutive
124- // zero bits for any uint8 value.
125- //
126- // The table is generated by calling consec8(i) for each
127- // possible uint8 value, which is defined as:
128- //
129- // // consec8 counts the maximum number of consecutive 0 bits
130- // // in a uint8.
131- // func consec8(n uint8) int {
132- // n = ^n
133- // i := 0
134- // for n != 0 {
135- // n &= (n << 1)
136- // i++
137- // }
138- // return i
139- // }
140- var consec8tab = [256 ]uint {
141- 8 , 7 , 6 , 6 , 5 , 5 , 5 , 5 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
142- 4 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
143- 5 , 4 , 3 , 3 , 2 , 2 , 2 , 2 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
144- 4 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
145- 6 , 5 , 4 , 4 , 3 , 3 , 3 , 3 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
146- 4 , 3 , 2 , 2 , 2 , 1 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
147- 5 , 4 , 3 , 3 , 2 , 2 , 2 , 2 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
148- 4 , 3 , 2 , 2 , 2 , 1 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
149- 7 , 6 , 5 , 5 , 4 , 4 , 4 , 4 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
150- 4 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
151- 5 , 4 , 3 , 3 , 2 , 2 , 2 , 2 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
152- 4 , 3 , 2 , 2 , 2 , 1 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
153- 6 , 5 , 4 , 4 , 3 , 3 , 3 , 3 , 3 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
154- 4 , 3 , 2 , 2 , 2 , 1 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
155- 5 , 4 , 3 , 3 , 2 , 2 , 2 , 2 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 1 ,
156- 4 , 3 , 2 , 2 , 2 , 1 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1 , 1 , 0 ,
157- }
158-
159123// summarize returns a packed summary of the bitmap in pallocBits.
160124func (b * pallocBits ) summarize () pallocSum {
161- // TODO(mknyszek): There may be something more clever to be done
162- // here to make the summarize operation more efficient. For example,
163- // we can compute start and end with 64-bit wide operations easily,
164- // but max is a bit more complex. Perhaps there exists some way to
165- // leverage the 64-bit start and end to our advantage?
166- var start , max , end uint
125+ var start , max , cur uint
126+ const notSetYet = ^ uint (0 ) // sentinel for start value
127+ start = notSetYet
167128 for i := 0 ; i < len (b ); i ++ {
168- a := b [i ]
169- for j := 0 ; j < 64 ; j += 8 {
170- k := uint8 (a >> j )
171-
172- // Compute start.
173- si := uint (sys .TrailingZeros8 (k ))
174- if start == uint (i * 64 + j ) {
175- start += si
176- }
129+ x := b [i ]
130+ if x == 0 {
131+ cur += 64
132+ continue
133+ }
134+ t := uint (sys .TrailingZeros64 (x ))
135+ l := uint (sys .LeadingZeros64 (x ))
177136
178- // Compute max.
179- if end + si > max {
180- max = end + si
181- }
182- if mi := consec8tab [k ]; mi > max {
183- max = mi
137+ // Finish any region spanning the uint64s
138+ cur += t
139+ if start == notSetYet {
140+ start = cur
141+ }
142+ if cur > max {
143+ max = cur
144+ }
145+ // Final region that might span to next uint64
146+ cur = l
147+ }
148+ if start == notSetYet {
149+ // Made it all the way through without finding a single 1 bit.
150+ const n = uint (64 * len (b ))
151+ return packPallocSum (n , n , n )
152+ }
153+ if cur > max {
154+ max = cur
155+ }
156+ if max >= 64 - 2 {
157+ // There is no way an internal run of zeros could beat max.
158+ return packPallocSum (start , max , cur )
159+ }
160+ // Now look inside each uint64 for runs of zeros.
161+ // All uint64s must be nonzero, or we would have aborted above.
162+ outer:
163+ for i := 0 ; i < len (b ); i ++ {
164+ x := b [i ]
165+
166+ // Look inside this uint64. We have a pattern like
167+ // 000000 1xxxxx1 000000
168+ // We need to look inside the 1xxxxx1 for any contiguous
169+ // region of zeros.
170+
171+ // We already know the trailing zeros are no larger than max. Remove them.
172+ x >>= sys .TrailingZeros64 (x ) & 63
173+ if x & (x + 1 ) == 0 { // no more zeros (except at the top).
174+ continue
175+ }
176+
177+ // Strategy: shrink all runs of zeros by max. If any runs of zero
178+ // remain, then we've identified a larger maxiumum zero run.
179+ p := max // number of zeros we still need to shrink by.
180+ k := uint (1 ) // current minimum length of runs of ones in x.
181+ for {
182+ // Shrink all runs of zeros by p places (except the top zeros).
183+ for p > 0 {
184+ if p <= k {
185+ // Shift p ones down into the top of each run of zeros.
186+ x |= x >> (p & 63 )
187+ if x & (x + 1 ) == 0 { // no more zeros (except at the top).
188+ continue outer
189+ }
190+ break
191+ }
192+ // Shift k ones down into the top of each run of zeros.
193+ x |= x >> (k & 63 )
194+ if x & (x + 1 ) == 0 { // no more zeros (except at the top).
195+ continue outer
196+ }
197+ p -= k
198+ // We've just doubled the minimum length of 1-runs.
199+ // This allows us to shift farther in the next iteration.
200+ k *= 2
184201 }
185202
186- // Compute end.
187- if k == 0 {
188- end += 8
189- } else {
190- end = uint (sys .LeadingZeros8 (k ))
203+ // The length of the lowest-order zero run is an increment to our maximum.
204+ j := uint (sys .TrailingZeros64 (^ x )) // count contiguous trailing ones
205+ x >>= j & 63 // remove trailing ones
206+ j = uint (sys .TrailingZeros64 (x )) // count contiguous trailing zeros
207+ x >>= j & 63 // remove zeros
208+ max += j // we have a new maximum!
209+ if x & (x + 1 ) == 0 { // no more zeros (except at the top).
210+ continue outer
191211 }
212+ p = j // remove j more zeros from each zero run.
192213 }
193214 }
194- return packPallocSum (start , max , end )
215+ return packPallocSum (start , max , cur )
195216}
196217
197218// find searches for npages contiguous free pages in pallocBits and returns
0 commit comments