@@ -368,7 +368,6 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
368368 IO_CACHE chunk_file; // For saving Merge_chunk structs.
369369 IO_CACHE *outfile; // Contains the final, sorted result.
370370 Sort_param param;
371- bool multi_byte_charset;
372371 Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key>
373372 pq ((Malloc_allocator<uchar*>
374373 (key_memory_Filesort_info_record_pointers)));
@@ -414,8 +413,7 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
414413
415414 param.init_for_filesort (filesort,
416415 make_array (filesort->sortorder , s_length),
417- sortlength (thd, filesort->sortorder , s_length,
418- &multi_byte_charset),
416+ sortlength (thd, filesort->sortorder , s_length),
419417 table,
420418 thd->variables .max_length_for_sort_data ,
421419 max_rows, sort_positions);
@@ -430,8 +428,7 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
430428 // If number of rows is not known, use as much of sort buffer as possible.
431429 num_rows_estimate= table->file ->estimate_rows_upper_bound ();
432430
433- if (multi_byte_charset &&
434- !(param.tmp_buffer = (char *)
431+ if (!(param.tmp_buffer = (char *)
435432 my_malloc (key_memory_Sort_param_tmp_buffer,
436433 param.max_compare_length (), MYF (MY_WME))))
437434 goto err;
@@ -1508,7 +1505,6 @@ uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
15081505 }
15091506
15101507 const CHARSET_INFO *cs=item->collation .collation ;
1511- char fill_char= ((cs->state & MY_CS_BINSORT) ? (char ) 0 : ' ' );
15121508
15131509 /* All item->str() to use some extra byte for end null.. */
15141510 String tmp ((char *) to,sort_field->length +4 ,cs);
@@ -1539,46 +1535,29 @@ uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
15391535 break ;
15401536 }
15411537 uint length= static_cast <uint>(res->length ());
1542- if (sort_field->need_strnxfrm )
1538+ const char *from= res->ptr ();
1539+ if (pointer_cast<const uchar *>(from) == to)
15431540 {
1544- char *from=(char *) res->ptr ();
1545- size_t tmp_length MY_ATTRIBUTE ((unused));
1546- if ((uchar*) from == to)
1547- {
1548- DBUG_ASSERT (sort_field->length >= length);
1549- set_if_smaller (length,sort_field->length );
1550- memcpy (tmp_buffer, from, length);
1551- from= tmp_buffer;
1552- }
1553- tmp_length=
1554- cs->coll ->strnxfrm (cs, to, sort_field->length ,
1555- item->max_char_length (),
1556- (uchar*) from, length,
1557- MY_STRXFRM_PAD_TO_MAXLEN);
1558- DBUG_ASSERT (tmp_length == sort_field->length );
1541+ DBUG_ASSERT (sort_field->length >= length);
1542+ set_if_smaller (length,sort_field->length );
1543+ memcpy (tmp_buffer, from, length);
1544+ from= tmp_buffer;
15591545 }
1560- else
1546+ uint sort_field_length= sort_field->length ;
1547+ if (sort_field->suffix_length )
15611548 {
1562- size_t diff;
1563- uint sort_field_length= sort_field->length -
1564- sort_field->suffix_length ;
1565- if (sort_field_length < length)
1566- {
1567- diff= 0 ;
1568- length= sort_field_length;
1569- }
1570- else
1571- diff= sort_field_length - length;
1572- if (sort_field->suffix_length )
1573- {
1574- /* Store length last in result_string */
1575- store_length (to + sort_field_length, length,
1576- sort_field->suffix_length );
1577- }
1578-
1579- my_strnxfrm (cs, to,length,(const uchar*)res->ptr (),length);
1580- cs->cset ->fill (cs, (char *)to+length,diff,fill_char);
1549+ /* Store length last in result_string */
1550+ sort_field_length-= sort_field->suffix_length ;
1551+ store_length (to + sort_field_length, length, sort_field->suffix_length );
15811552 }
1553+
1554+ size_t tmp_length MY_ATTRIBUTE ((unused));
1555+ tmp_length=
1556+ cs->coll ->strnxfrm (cs, to, sort_field_length,
1557+ item->max_char_length (),
1558+ pointer_cast<const uchar*>(from), length,
1559+ MY_STRXFRM_PAD_TO_MAXLEN);
1560+ DBUG_ASSERT (tmp_length == sort_field_length);
15821561 break ;
15831562 }
15841563 case INT_RESULT:
@@ -2394,32 +2373,25 @@ static uint suffix_length(ulong string_length)
23942373 @param thd Thread handler
23952374 @param sortorder Order of items to sort
23962375 @param s_length Number of items to sort
2397- @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
2398- (In which case we have to use strnxfrm())
23992376
24002377 @note
24012378 sortorder->length is updated for each sort item.
2402- @n
2403- sortorder->need_strnxfrm is set 1 if we have to use strnxfrm
24042379
24052380 @return
24062381 Total length of sort buffer in bytes
24072382*/
24082383
24092384uint
2410- sortlength (THD *thd, st_sort_field *sortorder, uint s_length,
2411- bool *multi_byte_charset)
2385+ sortlength (THD *thd, st_sort_field *sortorder, uint s_length)
24122386{
24132387 uint total_length= 0 ;
2414- *multi_byte_charset= false ;
24152388
24162389 // Heed the contract that strnxfrm() needs an even number of bytes.
24172390 const uint max_sort_length_even=
24182391 (thd->variables .max_sort_length + 1 ) & ~1 ;
24192392
24202393 for (; s_length-- ; sortorder++)
24212394 {
2422- DBUG_ASSERT (!sortorder->need_strnxfrm );
24232395 DBUG_ASSERT (sortorder->suffix_length == 0 );
24242396 if (sortorder->field )
24252397 {
@@ -2428,16 +2400,12 @@ sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
24282400 sortorder->length = field->sort_length ();
24292401 sortorder->is_varlen = field->sort_key_is_varlen ();
24302402
2431- if (use_strnxfrm (cs))
2432- {
2433- // How many bytes do we need (including sort weights) for strnxfrm()?
2434- sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2435- sortorder->need_strnxfrm = true ;
2436- *multi_byte_charset= 1 ;
2437- }
2403+ // How many bytes do we need (including sort weights) for strnxfrm()?
2404+ sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2405+
24382406 /*
24392407 NOTE: The corresponding test below also has a check for
2440- cs == &my_charset_bin to sort truncated blobs deterministically;
2408+ NO PAD collations to sort truncated blobs deterministically;
24412409 however, that part is dealt by in Field_blob/Field_varstring,
24422410 so we don't need it here.
24432411 */
@@ -2468,16 +2436,20 @@ sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
24682436 const CHARSET_INFO *cs= item->collation .collation ;
24692437 sortorder->length = item->max_length ;
24702438 set_if_smaller (sortorder->length , max_sort_length_even);
2471- if (use_strnxfrm (cs))
2472- {
2473- // How many bytes do we need (including sort weights) for strnxfrm()?
2474- sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2475- sortorder->need_strnxfrm = true ;
2476- *multi_byte_charset= 1 ;
2477- }
2478- else if (cs->pad_attribute == NO_PAD)
2439+
2440+ // How many bytes do we need (including sort weights) for strnxfrm()?
2441+ sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2442+
2443+ if (cs->pad_attribute == NO_PAD)
24792444 {
2480- /* Store length last to be able to sort blob/varbinary */
2445+ /*
2446+ Store length last, which makes it into a tie-breaker. This is
2447+ so that e.g. 'a' < 'a\0' for the binary collation, even though
2448+ the field is fixed-width and pads with '\0'. The utf8mb4_0900_*
2449+ collations technically don't need this, since they pad with 0
2450+ (which does not match any real weight), but we'd like not to
2451+ rely on such implementation details in filesort.
2452+ */
24812453 sortorder->suffix_length = suffix_length (sortorder->length );
24822454 sortorder->length += sortorder->suffix_length ;
24832455 }
0 commit comments