Skip to content

Commit 0bd8f06

Browse files
fix: healing to list, purge dangling objects (minio#621)
in a specific corner case when you only have dangling objects with single shard left over, we end up a situation where healing is unable to list this dangling object to purge due to the fact that listing logic expected only `len(disks)/2+1` - where as when you make this choice you end up with a situation that the drive where this object is present is not part of your expected disks list, causing it to be never listed and ignored into perpetuity. change the logic such that HealObjects() would be able to listAndHeal() per set properly on all its drives, since there is really no other way to do this cleanly, however instead of "listing" on all erasure sets simultaneously, we list on '3' at a time. So in a large enough cluster this is fairly staggered.
1 parent 6640be3 commit 0bd8f06

File tree

3 files changed

+8
-17
lines changed

3 files changed

+8
-17
lines changed

cmd/erasure-healing.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,6 @@ func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string,
5555
return errors.New("listAndHeal: No non-healing drives found")
5656
}
5757

58-
expectedDisks := len(disks)/2 + 1
59-
fallbackDisks := disks[expectedDisks:]
60-
disks = disks[:expectedDisks]
61-
6258
// How to resolve partial results.
6359
resolver := metadataResolutionParams{
6460
dirQuorum: 1,
@@ -75,7 +71,6 @@ func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string,
7571

7672
lopts := listPathRawOptions{
7773
disks: disks,
78-
fallbackDisks: fallbackDisks,
7974
bucket: bucket,
8075
path: path,
8176
filterPrefix: filterPrefix,

cmd/erasure-server-pool.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"github.com/minio/minio/internal/logger"
4646
"github.com/minio/pkg/v3/sync/errgroup"
4747
"github.com/minio/pkg/v3/wildcard"
48+
"github.com/minio/pkg/v3/workers"
4849
"github.com/puzpuzpuz/xsync/v3"
4950
)
5051

@@ -2467,7 +2468,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
24672468
ctx, cancel := context.WithCancel(ctx)
24682469
defer cancel()
24692470

2470-
var poolErrs [][]error
2471+
poolErrs := make([][]error, len(z.serverPools))
24712472
for idx, erasureSet := range z.serverPools {
24722473
if opts.Pool != nil && *opts.Pool != idx {
24732474
continue
@@ -2476,20 +2477,20 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
24762477
continue
24772478
}
24782479
errs := make([]error, len(erasureSet.sets))
2479-
var wg sync.WaitGroup
2480+
wk, _ := workers.New(3)
24802481
for idx, set := range erasureSet.sets {
24812482
if opts.Set != nil && *opts.Set != idx {
24822483
continue
24832484
}
2484-
wg.Add(1)
2485+
wk.Take()
24852486
go func(idx int, set *erasureObjects) {
2486-
defer wg.Done()
2487+
defer wk.Give()
24872488

24882489
errs[idx] = set.listAndHeal(ctx, bucket, prefix, opts.Recursive, opts.ScanMode, healEntry)
24892490
}(idx, set)
24902491
}
2491-
wg.Wait()
2492-
poolErrs = append(poolErrs, errs)
2492+
wk.Wait()
2493+
poolErrs[idx] = errs
24932494
}
24942495
for _, errs := range poolErrs {
24952496
for _, err := range errs {

cmd/global-heal.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
352352
disks[i], disks[j] = disks[j], disks[i]
353353
})
354354

355-
expectedDisks := len(disks)/2 + 1
356-
fallbackDisks := disks[expectedDisks:]
357-
disks = disks[:expectedDisks]
358-
359355
filterLifecycle := func(bucket, object string, fi FileInfo) bool {
360356
if lc == nil {
361357
return false
@@ -518,7 +514,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
518514

519515
err = listPathRaw(ctx, listPathRawOptions{
520516
disks: disks,
521-
fallbackDisks: fallbackDisks,
522517
bucket: bucket,
523518
recursive: true,
524519
forwardTo: forwardTo,
@@ -540,7 +535,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
540535
},
541536
finished: func(errs []error) {
542537
success := countErrs(errs, nil)
543-
if success < expectedDisks {
538+
if success < len(disks)/2+1 {
544539
retErr = fmt.Errorf("one or more errors reported during listing: %v", errors.Join(errs...))
545540
}
546541
},

0 commit comments

Comments
 (0)