Skip to content

Commit 481adce

Browse files
authored
Merge pull request #18449 from github/tausbn/misc-add-script-for-calculating-mrva-totals
Misc: Add script for calculating totals for a MRVA run
2 parents e054948 + 8808f0f commit 481adce

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

misc/scripts/calculate_mrva_totals.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import os
2+
import subprocess
3+
import tempfile
4+
import argparse
5+
from collections import defaultdict
6+
7+
help_text = """
8+
To use this script, pass the URL of a GitHub Gist as an argument. The Gist should contain the
9+
exported MarkDown output of a MRVA run.
10+
11+
The script expects the query to produce an output table of the form
12+
```
13+
| header0 | header1 | header2 | header3 | ...
14+
|----------|----------|----------|----------|----
15+
| message1 | value11 | value12 | value13 | ...
16+
| message2 | value21 | value22 | value23 | ...
17+
...
18+
```
19+
The script will calculate the totals for each message and header, and put a table containing these
20+
totals in the `_summary.md` file in the Gist. By default it will then commit and push these changes
21+
to the Gist (having first displayed a diff of the changes).
22+
"""
23+
24+
first_header = ""
25+
26+
def split_line(line):
27+
return [item.strip() for item in line.strip('|').split('|')]
28+
29+
def parse_markdown_table(stream):
30+
global first_header
31+
iterator = (line.strip() for line in stream)
32+
33+
# Skip irrelevant lines until we find the header line
34+
for line in iterator:
35+
if line.startswith('|'):
36+
first_header, *headers = split_line(line)
37+
break
38+
39+
# Skip the separator line
40+
next(iterator)
41+
42+
data_dict = {}
43+
44+
# Process the remaining lines
45+
for line in iterator:
46+
if line.startswith('|'):
47+
message, *values = [value.strip('`') for value in split_line(line)]
48+
data_dict[message] = {
49+
headers[i]: int(value) if value.isdigit() else value
50+
for i, value in enumerate(values)
51+
}
52+
53+
return data_dict
54+
55+
def clone_gist(gist_url, repo_dir):
56+
try:
57+
subprocess.run(["gh", "gist", "clone", gist_url, repo_dir], check=True)
58+
except subprocess.CalledProcessError:
59+
print(f"Failed to clone the gist from {gist_url}")
60+
subprocess.run(["rm", "-rf", repo_dir])
61+
exit(1)
62+
63+
def process_gist_files(repo_dir):
64+
total_data = defaultdict(lambda: defaultdict(int))
65+
66+
for filename in os.listdir(repo_dir):
67+
if filename.endswith(".md") and filename != "_summary.md":
68+
with open(os.path.join(repo_dir, filename), "r") as file:
69+
data_dict = parse_markdown_table(file)
70+
71+
for message, values in data_dict.items():
72+
for header, value in values.items():
73+
if isinstance(value, int):
74+
total_data[message][header] += value
75+
76+
return total_data
77+
78+
def append_totals_to_summary(total_data, repo_dir):
79+
global first_header
80+
summary_path = os.path.join(repo_dir, "_summary.md")
81+
with open(summary_path, "r") as summary_file:
82+
content = summary_file.read()
83+
84+
totals_table = "\n\n### Totals\n\n"
85+
headers = [first_header] + list(next(iter(total_data.values())).keys())
86+
totals_table += "| " + " | ".join(headers) + " |\n"
87+
totals_table += "| " + "|".join(["---"] + ["---:"] * (len(headers) - 1)) + " |\n" # Right align all but the first column
88+
for message, values in total_data.items():
89+
row = [message] + [f"{values[header]:,}" for header in headers[1:]]
90+
totals_table += "| " + " | ".join(row) + " |\n"
91+
92+
new_content = content.replace("### Summary", totals_table + "\n### Summary")
93+
94+
with open(summary_path, "w") as summary_file:
95+
summary_file.write(new_content)
96+
97+
def commit_and_push_changes(repo_dir):
98+
subprocess.run(["git", "add", "_summary.md"], cwd=repo_dir, check=True)
99+
subprocess.run(["git", "commit", "-m", "Update summary with totals"], cwd=repo_dir, check=True)
100+
subprocess.run(["git", "push"], cwd=repo_dir, check=True)
101+
102+
def show_git_diff(repo_dir):
103+
subprocess.run(["git", "diff", "_summary.md"], cwd=repo_dir, check=True)
104+
105+
if __name__ == "__main__":
106+
parser = argparse.ArgumentParser(description="Calculate MRVA totals from a GitHub Gist", epilog=help_text, formatter_class=argparse.RawTextHelpFormatter)
107+
parser.add_argument("gist_url", nargs='?', help="URL of the GitHub Gist")
108+
parser.add_argument("--keep-dir", action="store_true", help="Keep the temporary directory")
109+
110+
args = parser.parse_args()
111+
112+
if not args.gist_url:
113+
parser.print_help()
114+
exit(1)
115+
116+
repo_dir = tempfile.mkdtemp(dir=".")
117+
clone_gist(args.gist_url, repo_dir)
118+
119+
total_data = process_gist_files(repo_dir)
120+
121+
append_totals_to_summary(total_data, repo_dir)
122+
123+
show_git_diff(repo_dir)
124+
125+
if input("Do you want to push the changes to the gist? (Y/n): ").strip().lower() in ['y', '']:
126+
commit_and_push_changes(repo_dir)
127+
128+
if args.keep_dir:
129+
print(f"Temporary directory retained at: {repo_dir}")
130+
else:
131+
subprocess.run(["rm", "-rf", repo_dir])

0 commit comments

Comments
 (0)