Skip to content

Commit a4e3ad3

Browse files
committed
Merge pull request cockroachdb#6361 from vivekmenezes/vivek/checkpoint
sql: simplify schema change backfill to not care for table versions
2 parents 76c8dd5 + aa31e07 commit a4e3ad3

File tree

4 files changed

+81
-82
lines changed

4 files changed

+81
-82
lines changed

sql/backfill.go

Lines changed: 29 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -89,46 +89,8 @@ func convertBackfillError(
8989
return convertBatchError(tableDesc, *b, pErr)
9090
}
9191

92-
var descriptorChangedVersionError = roachpb.NewErrorf("table descriptor has changed version")
93-
94-
// getTableDescAtVersion attempts to read a descriptor from the database at a
95-
// specific version. It returns descriptorChangedVersionError if the current
96-
// version is not the passed in version.
97-
func getTableDescAtVersion(
98-
txn *client.Txn, id ID, version DescriptorVersion,
99-
) (*TableDescriptor, *roachpb.Error) {
100-
tableDesc, pErr := getTableDescFromID(txn, id)
101-
if pErr != nil {
102-
return nil, pErr
103-
}
104-
if tableDesc == nil {
105-
return nil, roachpb.NewError(&roachpb.DescriptorDeletedError{})
106-
}
107-
if version != tableDesc.Version {
108-
return nil, descriptorChangedVersionError
109-
}
110-
return tableDesc, nil
111-
}
112-
113-
// runBackfill runs the backfill for the schema changer. It runs the entire
114-
// backfill at a specific version of the table descriptor, and re-attempts to
115-
// run the schema change when the version changes.
92+
// runBackfill runs the backfill for the schema changer.
11693
func (sc *SchemaChanger) runBackfill(lease *TableDescriptor_SchemaChangeLease) *roachpb.Error {
117-
for {
118-
pErr := sc.runBackfillAtLatestVersion(lease)
119-
if pErr == descriptorChangedVersionError {
120-
continue
121-
}
122-
return pErr
123-
}
124-
}
125-
126-
// Run the backfill at the latest table descriptor version. It returns
127-
// descriptorChangedVersionError when the table descriptor version changes
128-
// while it is running the backfill.
129-
func (sc *SchemaChanger) runBackfillAtLatestVersion(
130-
lease *TableDescriptor_SchemaChangeLease,
131-
) *roachpb.Error {
13294
l, pErr := sc.ExtendLease(*lease)
13395
if pErr != nil {
13496
return pErr
@@ -141,19 +103,12 @@ func (sc *SchemaChanger) runBackfillAtLatestVersion(
141103
var droppedIndexDescs []IndexDescriptor
142104
var addedColumnDescs []ColumnDescriptor
143105
var addedIndexDescs []IndexDescriptor
144-
// Remember the version at the start of the backfill so we can ensure
145-
// later that the table descriptor hasn't changed during the backfill
146-
// process.
147-
var version DescriptorVersion
148106
if pErr := sc.db.Txn(func(txn *client.Txn) *roachpb.Error {
149107
tableDesc, pErr := getTableDescFromID(txn, sc.tableID)
150108
if pErr != nil {
151109
return pErr
152110
}
153-
if tableDesc == nil {
154-
return roachpb.NewError(&roachpb.DescriptorDeletedError{})
155-
}
156-
version = tableDesc.Version
111+
157112
for _, m := range tableDesc.Mutations {
158113
if m.MutationID != sc.mutationID {
159114
break
@@ -187,31 +142,30 @@ func (sc *SchemaChanger) runBackfillAtLatestVersion(
187142

188143
// Add and drop columns.
189144
if pErr := sc.truncateAndBackfillColumns(
190-
lease, addedColumnDescs, droppedColumnDescs, version,
145+
lease, addedColumnDescs, droppedColumnDescs,
191146
); pErr != nil {
192147
return pErr
193148
}
194149

195150
// Drop indexes.
196-
if pErr := sc.truncateIndexes(lease, droppedIndexDescs, version); pErr != nil {
151+
if pErr := sc.truncateIndexes(lease, droppedIndexDescs); pErr != nil {
197152
return pErr
198153
}
199154

200155
// Add new indexes.
201-
if pErr := sc.backfillIndexes(lease, addedIndexDescs, version); pErr != nil {
156+
if pErr := sc.backfillIndexes(lease, addedIndexDescs); pErr != nil {
202157
return pErr
203158
}
204159

205160
return nil
206161
}
207162

208163
// getTableSpan returns a span containing the start and end key for a table.
209-
// It also checks that the version hasn't changed.
210-
func (sc *SchemaChanger) getTableSpan(version DescriptorVersion) (span, *roachpb.Error) {
164+
func (sc *SchemaChanger) getTableSpan() (span, *roachpb.Error) {
211165
var tableDesc *TableDescriptor
212166
if pErr := sc.db.Txn(func(txn *client.Txn) *roachpb.Error {
213167
var pErr *roachpb.Error
214-
tableDesc, pErr = getTableDescAtVersion(txn, sc.tableID, version)
168+
tableDesc, pErr = getTableDescFromID(txn, sc.tableID)
215169
return pErr
216170
}); pErr != nil {
217171
return span{}, pErr
@@ -235,7 +189,6 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
235189
lease *TableDescriptor_SchemaChangeLease,
236190
added []ColumnDescriptor,
237191
dropped []ColumnDescriptor,
238-
version DescriptorVersion,
239192
) *roachpb.Error {
240193
evalCtx := parser.EvalContext{}
241194
// Set the eval context timestamps.
@@ -266,7 +219,7 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
266219
*lease = l
267220

268221
// Initialize start and end to represent a span of keys.
269-
sp, err := sc.getTableSpan(version)
222+
sp, err := sc.getTableSpan()
270223
if err != nil {
271224
return err
272225
}
@@ -280,12 +233,17 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
280233
}
281234
*lease = l
282235

283-
var currSentinel roachpb.Key
236+
var curSentinel roachpb.Key
284237
if pErr := sc.db.Txn(func(txn *client.Txn) *roachpb.Error {
285-
tableDesc, pErr := getTableDescAtVersion(txn, sc.tableID, version)
238+
tableDesc, pErr := getTableDescFromID(txn, sc.tableID)
286239
if pErr != nil {
287240
return pErr
288241
}
242+
// Short circuit the backfill if the table has been deleted.
243+
if tableDesc.Deleted {
244+
done = true
245+
return nil
246+
}
289247

290248
// Run a scan across the table using the primary key. Running
291249
// the scan and applying the changes in many transactions is
@@ -318,7 +276,7 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
318276
sentinelKey = stripColumnIDLength(kv.Key)
319277
// Store away key for the next table row as the
320278
// point from which to start from.
321-
currSentinel = sentinelKey
279+
curSentinel = sentinelKey
322280

323281
// Delete the entire dropped columns. This used to
324282
// use SQL UPDATE in the past to update the
@@ -379,7 +337,7 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
379337
return pErr
380338
}
381339
// Store away next starting point.
382-
sp.start = currSentinel.PrefixEnd()
340+
sp.start = curSentinel.PrefixEnd()
383341
}
384342
}
385343
return nil
@@ -388,7 +346,6 @@ func (sc *SchemaChanger) truncateAndBackfillColumns(
388346
func (sc *SchemaChanger) truncateIndexes(
389347
lease *TableDescriptor_SchemaChangeLease,
390348
dropped []IndexDescriptor,
391-
version DescriptorVersion,
392349
) *roachpb.Error {
393350
for _, desc := range dropped {
394351
// First extend the schema change lease.
@@ -398,10 +355,14 @@ func (sc *SchemaChanger) truncateIndexes(
398355
}
399356
*lease = l
400357
if pErr := sc.db.Txn(func(txn *client.Txn) *roachpb.Error {
401-
tableDesc, pErr := getTableDescAtVersion(txn, sc.tableID, version)
358+
tableDesc, pErr := getTableDescFromID(txn, sc.tableID)
402359
if pErr != nil {
403360
return pErr
404361
}
362+
// Short circuit the truncation if the table has been deleted.
363+
if tableDesc.Deleted {
364+
return nil
365+
}
405366

406367
indexPrefix := MakeIndexKeyPrefix(tableDesc.ID, desc.ID)
407368

@@ -436,7 +397,6 @@ const IndexBackfillChunkSize = 100
436397
func (sc *SchemaChanger) backfillIndexes(
437398
lease *TableDescriptor_SchemaChangeLease,
438399
added []IndexDescriptor,
439-
version DescriptorVersion,
440400
) *roachpb.Error {
441401
if len(added) == 0 {
442402
return nil
@@ -449,7 +409,7 @@ func (sc *SchemaChanger) backfillIndexes(
449409
*lease = l
450410

451411
// Initialize start and end to represent a span of keys.
452-
sp, err := sc.getTableSpan(version)
412+
sp, err := sc.getTableSpan()
453413
if err != nil {
454414
return err
455415
}
@@ -464,10 +424,15 @@ func (sc *SchemaChanger) backfillIndexes(
464424
*lease = l
465425
var nextKey roachpb.Key
466426
pErr = sc.db.Txn(func(txn *client.Txn) *roachpb.Error {
467-
tableDesc, pErr := getTableDescAtVersion(txn, sc.tableID, version)
427+
tableDesc, pErr := getTableDescFromID(txn, sc.tableID)
468428
if pErr != nil {
469429
return pErr
470430
}
431+
// Short circuit the backfill if the table has been deleted.
432+
if tableDesc.Deleted {
433+
done = true
434+
return nil
435+
}
471436

472437
// Get the next set of rows.
473438
// TODO(tamird): Support partial indexes?

sql/schema_changer.go

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ func (sc *SchemaChanger) AcquireLease() (TableDescriptor_SchemaChangeLease, *roa
7878
if err != nil {
7979
return err
8080
}
81-
if tableDesc == nil {
82-
return roachpb.NewError(&roachpb.DescriptorDeletedError{})
83-
}
8481

8582
// A second to deal with the time uncertainty across nodes.
8683
// It is perfectly valid for two or more goroutines to hold a valid
@@ -109,9 +106,6 @@ func (sc *SchemaChanger) findTableWithLease(
109106
if err != nil {
110107
return nil, err
111108
}
112-
if tableDesc == nil {
113-
return nil, roachpb.NewError(&roachpb.DescriptorDeletedError{})
114-
}
115109
if tableDesc.Lease == nil {
116110
return nil, roachpb.NewErrorf("no lease present for tableID: %d", sc.tableID)
117111
}
@@ -185,13 +179,15 @@ func (sc SchemaChanger) exec(startBackfillNotification func()) *roachpb.Error {
185179
// Wait for everybody to see the version with the deleted bit set. When
186180
// this returns, nobody has any leases on the table, nor can get new leases,
187181
// so the table will no longer be modified.
182+
188183
lease, pErr = sc.ExtendLease(lease)
189184
if pErr != nil {
190185
return pErr
191186
}
192187
if err := sc.waitToUpdateLeases(); err != nil {
193188
return roachpb.NewError(err)
194189
}
190+
195191
// Truncate the table and delete the descriptor.
196192
if pErr := sc.truncateAndDropTable(&lease, desc.GetTable()); pErr != nil {
197193
return pErr
@@ -406,11 +402,6 @@ func (sc *SchemaChanger) IsDone() (bool, error) {
406402
if pErr != nil {
407403
return pErr
408404
}
409-
if tableDesc == nil {
410-
// The table has been deleted => no more schema changes to be done for this table.
411-
done = true
412-
return nil
413-
}
414405
if sc.mutationID == invalidMutationID {
415406
if tableDesc.UpVersion {
416407
done = false
@@ -425,7 +416,7 @@ func (sc *SchemaChanger) IsDone() (bool, error) {
425416
}
426417
return nil
427418
})
428-
return done && pErr == nil, pErr.GoError()
419+
return done, pErr.GoError()
429420
}
430421

431422
// SchemaChangeManager processes pending schema changes seen in gossip
@@ -596,7 +587,7 @@ func (s *SchemaChangeManager) Start(stopper *stop.Stopper) {
596587
if pErr != nil {
597588
switch pErr.GetDetail().(type) {
598589
case *roachpb.ExistingSchemaChangeLeaseError:
599-
case *roachpb.DescriptorDeletedError:
590+
case *roachpb.DescriptorNotFoundError:
600591
// Someone deleted this table. Don't try to run the schema
601592
// changer again. Note that there's no gossip update for the
602593
// deletion which would remove this schemaChanger.

sql/schema_changer_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,4 +667,45 @@ CREATE UNIQUE INDEX vidx ON t.test (v);
667667
if eCount != count {
668668
t.Fatalf("read the wrong number of rows: e = %d, v = %d", eCount, count)
669669
}
670+
671+
// Verify that a table delete in the middle of a backfill works properly.
672+
// The backfill will terminate in the middle, and the delete will
673+
// successfully delete all the table data.
674+
//
675+
// This test could be made its own test but is placed here to speed up the
676+
// testing.
677+
678+
backfillNotification = make(chan bool)
679+
// Run the schema change in a separate goroutine.
680+
var wg sync.WaitGroup
681+
wg.Add(1)
682+
go func() {
683+
// Start schema change that eventually runs a backfill.
684+
if _, err := sqlDB.Exec("CREATE UNIQUE INDEX bar ON t.test (v)"); err != nil {
685+
t.Error(err)
686+
}
687+
wg.Done()
688+
}()
689+
690+
// Wait until the schema change backfill starts.
691+
<-backfillNotification
692+
693+
// Wait for a short bit to ensure that the backfill has likely progressed
694+
// and written some data, but not long enough that the backfill has
695+
// completed.
696+
time.Sleep(10 * time.Millisecond)
697+
698+
if _, err := sqlDB.Exec("DROP TABLE t.test"); err != nil {
699+
t.Fatal(err)
700+
}
701+
702+
// Wait until the schema change is done.
703+
wg.Wait()
704+
705+
// Ensure that the table data has been deleted.
706+
if kvs, err := kvDB.Scan(tablePrefix, tableEnd, 0); err != nil {
707+
t.Fatal(err)
708+
} else if len(kvs) != 0 {
709+
t.Fatalf("expected %d key value pairs, but got %d", 0, len(kvs))
710+
}
670711
}

sql/table.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,18 +254,20 @@ func (p *planner) getTableDesc(qname *parser.QualifiedName) (*TableDescriptor, *
254254
}
255255

256256
// get the table descriptor for the ID passed in using an existing txn.
257-
// returns nil if the descriptor doesn't exist or if it exists but is not a
258-
// table.
257+
// returns an error if the descriptor doesn't exist or if it exists and is not
258+
// a table.
259259
func getTableDescFromID(txn *client.Txn, id ID) (*TableDescriptor, *roachpb.Error) {
260260
desc := &Descriptor{}
261261
descKey := MakeDescMetadataKey(id)
262262

263263
if pErr := txn.GetProto(descKey, desc); pErr != nil {
264264
return nil, pErr
265265
}
266-
// TODO(andrei): We're theoretically hiding the error where desc exists, but
267-
// is not a TableDescriptor.
268-
return desc.GetTable(), nil
266+
table := desc.GetTable()
267+
if table == nil {
268+
return nil, roachpb.NewError(&roachpb.DescriptorNotFoundError{DescriptorId: uint32(id)})
269+
}
270+
return table, nil
269271
}
270272

271273
func getKeysForTableDescriptor(

0 commit comments

Comments
 (0)