Skip to content

Commit d39ecbd

Browse files
derrickstoleedscho
authored andcommitted
survey: summarize total sizes by object type
Now that we have explored objects by count, we can expand that a bit more to summarize the data for the on-disk and inflated size of those objects. This information is helpful for diagnosing both why disk space (and perhaps clone or fetch times) is growing but also why certain operations are slow because the inflated size of the abstract objects that must be processed is so large. Signed-off-by: Derrick Stolee <[email protected]>
1 parent f54e198 commit d39ecbd

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed

builtin/survey.c

+132
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,36 @@ struct survey_report_object_summary {
6060
size_t blobs_nr;
6161
};
6262

63+
/**
64+
* For some category given by 'label', count the number of objects
65+
* that match that label along with the on-disk size and the size
66+
* after decompressing (both with delta bases and zlib).
67+
*/
68+
struct survey_report_object_size_summary {
69+
char *label;
70+
size_t nr;
71+
size_t disk_size;
72+
size_t inflated_size;
73+
size_t num_missing;
74+
};
75+
6376
/**
6477
* This struct contains all of the information that needs to be printed
6578
* at the end of the exploration of the repository and its references.
6679
*/
6780
struct survey_report {
6881
struct survey_report_ref_summary refs;
6982
struct survey_report_object_summary reachable_objects;
83+
84+
struct survey_report_object_size_summary *by_type;
7085
};
7186

87+
#define REPORT_TYPE_COMMIT 0
88+
#define REPORT_TYPE_TREE 1
89+
#define REPORT_TYPE_BLOB 2
90+
#define REPORT_TYPE_TAG 3
91+
#define REPORT_TYPE_COUNT 4
92+
7293
struct survey_context {
7394
struct repository *repo;
7495

@@ -280,12 +301,48 @@ static void survey_report_plaintext_reachable_object_summary(struct survey_conte
280301
clear_table(&table);
281302
}
282303

304+
static void survey_report_object_sizes(const char *title,
305+
const char *categories,
306+
struct survey_report_object_size_summary *summary,
307+
size_t summary_nr)
308+
{
309+
struct survey_table table = SURVEY_TABLE_INIT;
310+
table.table_name = title;
311+
312+
strvec_push(&table.header, categories);
313+
strvec_push(&table.header, _("Count"));
314+
strvec_push(&table.header, _("Disk Size"));
315+
strvec_push(&table.header, _("Inflated Size"));
316+
317+
for (size_t i = 0; i < summary_nr; i++) {
318+
char *label_str = xstrdup(summary[i].label);
319+
char *nr_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].nr);
320+
char *disk_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].disk_size);
321+
char *inflate_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].inflated_size);
322+
323+
insert_table_rowv(&table, label_str, nr_str,
324+
disk_str, inflate_str, NULL);
325+
326+
free(label_str);
327+
free(nr_str);
328+
free(disk_str);
329+
free(inflate_str);
330+
}
331+
332+
print_table_plaintext(&table);
333+
clear_table(&table);
334+
}
335+
283336
static void survey_report_plaintext(struct survey_context *ctx)
284337
{
285338
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
286339
printf("-----------------------------------------------------\n");
287340
survey_report_plaintext_refs(ctx);
288341
survey_report_plaintext_reachable_object_summary(ctx);
342+
survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"),
343+
_("Object Type"),
344+
ctx->report.by_type,
345+
REPORT_TYPE_COUNT);
289346
}
290347

291348
/*
@@ -499,6 +556,68 @@ static void increment_object_counts(
499556
}
500557
}
501558

559+
static void increment_totals(struct survey_context *ctx,
560+
struct oid_array *oids,
561+
struct survey_report_object_size_summary *summary)
562+
{
563+
for (size_t i = 0; i < oids->nr; i++) {
564+
struct object_info oi = OBJECT_INFO_INIT;
565+
unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
566+
unsigned long object_length = 0;
567+
off_t disk_sizep = 0;
568+
enum object_type type;
569+
570+
oi.typep = &type;
571+
oi.sizep = &object_length;
572+
oi.disk_sizep = &disk_sizep;
573+
574+
if (oid_object_info_extended(ctx->repo, &oids->oid[i],
575+
&oi, oi_flags) < 0) {
576+
summary->num_missing++;
577+
} else {
578+
summary->nr++;
579+
summary->disk_size += disk_sizep;
580+
summary->inflated_size += object_length;
581+
}
582+
}
583+
}
584+
585+
static void increment_object_totals(struct survey_context *ctx,
586+
struct oid_array *oids,
587+
enum object_type type)
588+
{
589+
struct survey_report_object_size_summary *total;
590+
struct survey_report_object_size_summary summary = { 0 };
591+
592+
increment_totals(ctx, oids, &summary);
593+
594+
switch (type) {
595+
case OBJ_COMMIT:
596+
total = &ctx->report.by_type[REPORT_TYPE_COMMIT];
597+
break;
598+
599+
case OBJ_TREE:
600+
total = &ctx->report.by_type[REPORT_TYPE_TREE];
601+
break;
602+
603+
case OBJ_BLOB:
604+
total = &ctx->report.by_type[REPORT_TYPE_BLOB];
605+
break;
606+
607+
case OBJ_TAG:
608+
total = &ctx->report.by_type[REPORT_TYPE_TAG];
609+
break;
610+
611+
default:
612+
BUG("No other type allowed");
613+
}
614+
615+
total->nr += summary.nr;
616+
total->disk_size += summary.disk_size;
617+
total->inflated_size += summary.inflated_size;
618+
total->num_missing += summary.num_missing;
619+
}
620+
502621
static int survey_objects_path_walk_fn(const char *path,
503622
struct oid_array *oids,
504623
enum object_type type,
@@ -508,10 +627,20 @@ static int survey_objects_path_walk_fn(const char *path,
508627

509628
increment_object_counts(&ctx->report.reachable_objects,
510629
type, oids->nr);
630+
increment_object_totals(ctx, oids, type);
511631

512632
return 0;
513633
}
514634

635+
static void initialize_report(struct survey_context *ctx)
636+
{
637+
CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
638+
ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
639+
ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
640+
ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
641+
ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags"));
642+
}
643+
515644
static void survey_phase_objects(struct survey_context *ctx)
516645
{
517646
struct rev_info revs = REV_INFO_INIT;
@@ -524,12 +653,15 @@ static void survey_phase_objects(struct survey_context *ctx)
524653
info.path_fn = survey_objects_path_walk_fn;
525654
info.path_fn_data = ctx;
526655

656+
initialize_report(ctx);
657+
527658
repo_init_revisions(ctx->repo, &revs, "");
528659
revs.tag_objects = 1;
529660

530661
for (int i = 0; i < ctx->ref_array.nr; i++) {
531662
struct ref_array_item *item = ctx->ref_array.items[i];
532663
add_pending_oid(&revs, NULL, &item->objectname, add_flags);
664+
display_progress(ctx->progress, ++(ctx->progress_nr));
533665
}
534666

535667
walk_objects_by_path(&info);

t/t8100-git-survey.sh

+29
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,26 @@ test_expect_success 'git survey (default)' '
2929
git survey --all-refs >out 2>err &&
3030
test_line_count = 0 err &&
3131
32+
test_oid_cache <<-EOF &&
33+
commits_size_on_disk sha1: 1523
34+
commits_size_on_disk sha256: 1811
35+
36+
commits_size sha1: 2153
37+
commits_size sha256: 2609
38+
39+
trees_size_on_disk sha1: 495
40+
trees_size_on_disk sha256: 635
41+
42+
trees_size sha1: 1706
43+
trees_size sha256: 2366
44+
45+
tags_size sha1: 528
46+
tags_size sha256: 624
47+
48+
tags_size_on_disk sha1: 510
49+
tags_size_on_disk sha256: 569
50+
EOF
51+
3252
tr , " " >expect <<-EOF &&
3353
GIT SURVEY for "$(pwd)"
3454
-----------------------------------------------------
@@ -50,6 +70,15 @@ test_expect_success 'git survey (default)' '
5070
Commits | 10
5171
Trees | 10
5272
Blobs | 10
73+
74+
TOTAL OBJECT SIZES BY TYPE
75+
===============================================
76+
Object Type | Count | Disk Size | Inflated Size
77+
------------+-------+-----------+--------------
78+
Commits | 10 | $(test_oid commits_size_on_disk) | $(test_oid commits_size)
79+
Trees | 10 | $(test_oid trees_size_on_disk) | $(test_oid trees_size)
80+
Blobs | 10 | 191 | 101
81+
Tags | 4 | $(test_oid tags_size_on_disk) | $(test_oid tags_size)
5382
EOF
5483
5584
test_cmp expect out

0 commit comments

Comments
 (0)