Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions dev/audit-log-review/create_app_usage_sheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import csv

from lib.models import *
from collections import defaultdict

from pony.orm import db_session

@db_session
def main():
headers = ["app", "level", "category", "count"]
results = []
for app in App.select(lambda x: True):
hits = EndpointHit.select(lambda hit: hit.app == app and hit.count > 0)
stats = defaultdict(lambda: defaultdict(int))
for hit in hits:
endpoint = hit.endpoint
stats[endpoint.level][endpoint.category] += 1
for level in stats:
for category, count in stats[level].items():
results += [[app.name, level, category, count]]

results = sorted(results, key=lambda x: (x[0], -x[-1], x[1], x[2]))
with open("output-apps.csv", "wb") as f:
writer = csv.writer(f)
writer.writerow(headers)
for result in results:
writer.writerow(result)


main()
56 changes: 56 additions & 0 deletions dev/audit-log-review/create_chart_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import csv

import sys
from lib.models import *

from pony.orm import db_session, select, count

from collections import defaultdict

@db_session
def main(appname=None):
results = []
apps = App.select(lambda x: appname is None or x.name == appname).order_by(App.name)

hits = select((hit.endpoint, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0)
misses = select((hit.endpoint.level, hit.endpoint.category, count()) for hit in EndpointHit if hit.app in apps and hit.count == 0)

# sort everything here
order_level = defaultdict(int)
for level, hitcount in select((hit.endpoint.level, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0):
order_level[level] = hitcount
order_level_category = defaultdict(lambda: defaultdict(int))
for level, category, hitcount in select((hit.endpoint.level, hit.endpoint.category, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0):
order_level_category[level][category] = hitcount

def sort_hits_fn(row):
endpoint, hitcount = row
return (
-order_level[endpoint.level],
-order_level_category[endpoint.level][endpoint.category],
-hitcount,
endpoint.url,
endpoint.method)

hits = sorted(hits, key=sort_hits_fn)
print len(hits), "hits"

with open('output-chart.csv', 'wb') as f:
writer = csv.writer(f)

headers = ['level', 'category', 'method + url', 'count']
writer.writerow(headers)
for endpoint, count in hits:
category = endpoint.category
if category == '':
category = "uncategorized"
writer.writerow([endpoint.level, category, endpoint.method + " " + endpoint.url, count])
for level, category, count in misses:
if category == '':
category = "uncategorized"
writer.writerow([level, category, "unused", count])

if len(sys.argv) > 1:
main(sys.argv[1])
else:
main()
57 changes: 57 additions & 0 deletions dev/audit-log-review/create_spreadsheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import csv

from lib.models import *

from pony.orm import db_session


@db_session
def main():
results = []
apps = App.select(lambda x: True).order_by(App.name)
num_apps = len(apps)
endpoints = Endpoint.select(lambda x: True).order_by(Endpoint.level, Endpoint.url, Endpoint.method)
headers = ['level', 'category', 'method', 'url', 'conforms', 'apps using it']

for app in apps:
headers += [app.name]
# for app in apps:
# headers += [app.name + ' count']

headers += ['questions']

for endpoint in endpoints:
result = [endpoint.level, endpoint.category, endpoint.method, endpoint.url, endpoint.conforms]
counts = []
hits = []
for app in apps:
hit = EndpointHit.get(endpoint=endpoint, app=app)
if hit is not None:
count = hit.count
else:
count = 0
# hits.append(('', 'x')[count > 0])
counts.append(count)
# result += hits
apps_using = len(filter(lambda x: x > 0, counts))
result += [apps_using]
result += counts
result += [endpoint.questions]
results += [result]

idx_using = 5
idx_method = 2
idx_url = 3
idx_level = 0
idx_category = 1

results = sorted(results, key=lambda x: (-x[idx_using]))

with open("output-spreadsheet.csv", "wb") as f:
writer = csv.writer(f)
writer.writerow(headers)
for result in results:
writer.writerow(result)


main()
38 changes: 38 additions & 0 deletions dev/audit-log-review/create_summaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import csv

from lib.models import *
from collections import defaultdict

from pony.orm import db_session

@db_session
def main():
headers = ["app", "stable", "beta", "alpha", "total", "conformance"]
results = []
for app in App.select(lambda x: True):
hits = EndpointHit.select(lambda hit: hit.app == app and hit.count > 0)
stats = defaultdict(lambda: defaultdict(int))
for hit in hits:
endpoint = hit.endpoint
stats[endpoint.level][endpoint.category] += 1

alphasum = sum(stats['alpha'].values())
betasum = sum(stats['beta'].values())
stablesum = sum(stats['stable'].values())
level = "stable"
if betasum > 0:
level = "beta"
if alphasum > 0:
level = "alpha"
total = alphasum + betasum + stablesum
results += [[app.name, stablesum, betasum, alphasum, total, level]]

results = sorted(results, key=lambda x: (x[0]))
with open("output-summary.csv", "wb") as f:
writer = csv.writer(f)
writer.writerow(headers)
for result in results:
writer.writerow(result)


main()
12 changes: 12 additions & 0 deletions dev/audit-log-review/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Sequences sunburst</title>

</head>
<body>
<div><a href="sunburst-apps/index.html">Apps</a></div>
<div><a href="sunburst/index.html">e2e</a></div>
<div><a href="https://docs.google.com/spreadsheets/d/1dgs6GlKeLB2GvadQxGmbRaDNSqDk6IlztZ-Npj4Mx6I/edit?usp=sharing">Spreadsheet with full results</a></div>
</html>
22 changes: 4 additions & 18 deletions dev/audit-log-review/sunburst-apps/README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,8 @@
(This is a minimal update of my original [Sequences sunburst](http://bl.ocks.org/kerryrodden/7090426) block, to work correctly with d3 v4.)
Visualization showing API server endpoints used by Kubernetes apps

This example shows how it is possible to use a [D3 sunburst visualization](http://bl.ocks.org/mbostock/4063423) (partition layout) with data that describes sequences of events.
To generate the data:

A good use case is to summarize navigation paths through a web site, as in the sample synthetic data file (visit_sequences.csv). The visualization makes it easy to understand visits that start directly on a product page (e.g. after landing there from a search engine), compared to visits where users arrive on the site's home page and navigate from there. Where a funnel lets you understand a single pre-selected path, this allows you to see all possible paths.
```python create_chart_csv.py```

Features:

* works with data that is in a CSV format (you don't need to pre-generate a hierarchical JSON file, unless your data file is very large)
* interactive breadcrumb trail helps to emphasize the sequence, so that it is easy for a first-time user to understand what they are seeing
* percentages are shown explicitly, to help overcome the distortion of the data that occurs when using a radial presentation

If you want to simply reuse this with your own data, here are some tips for generating the CSV file:

* no header is required (but it's OK if one is present)
* use a hyphen to separate the steps in the sequence
* the step names should be one word only, and ideally should be kept short. Non-alphanumeric characters will probably cause problems (I haven't tested this).
* every sequence should have an "end" marker as the last element, *unless* it has been truncated because it is longer than the maximum sequence length (6, in the example). The purpose of the "end" marker is to distinguish a true end point (e.g. the user left the site) from an end point that has been forced by truncation.
* each line should be a complete path from root to leaf - don't include counts for intermediate steps. For example, include "home-search-end" and "home-search-product-end" but not "home-search" - the latter is computed by the partition layout, by adding up the counts of all the sequences with that prefix.
* to keep the number of permutations low, use a small number of unique step names, and a small maximum sequence length. Larger numbers of either of these will lead to a very large CSV that will be slow to process (and therefore require pre-processing into hierarchical JSON).

I created this example in my work at Google, but it is not part of any Google product. It is covered by the Apache license (see the LICENSE file).
This visualisation is based on kerryrodden's sunburst graph example found [here](https://gist.github.com/kerryrodden/766f8f6d31f645c39f488a0befa1e3c8)
1 change: 0 additions & 1 deletion dev/audit-log-review/sunburst-apps/chart.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
</head>
<body>
<div id="main">
<div id="sequence"></div>
<div id="chart">
<div id="explanation">
<div id="reallybigline"></div>
Expand Down
13 changes: 7 additions & 6 deletions dev/audit-log-review/sunburst-apps/sequences.css
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,29 @@ body {
font-size: 12px;
font-weight: 400;
background-color: #fff;
width: 900px;
width: 1100px;
height: 650px;
margin-top: 10px;
}

#main {
float: left;
float: right;
width: 750px;
}

#sidebar {
float: right;
width: 100px;
float: left;
padding-left: 20px;
}

#sequence {
width: 600px;
height: 70px;
display: none;
}

#legend {
padding: 80px 0 0 50px;
padding: 0 0 0 0;
}

#sequence text, #legend text {
Expand All @@ -43,7 +44,7 @@ body {
#explanation {
position: absolute;
top: 250px;
left: 325px;
left: 200px;
width: 250px;
text-align: center;
color: #000;
Expand Down
4 changes: 2 additions & 2 deletions dev/audit-log-review/sunburst-apps/sequences.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


// Dimensions of sunburst.
var width = 900;
var width = 650;
var height = 650;
var radius = Math.min(width, height) / 2;

Expand Down Expand Up @@ -310,7 +310,7 @@ function drawLegend() {

// Dimensions of legend item: width, height, spacing, radius of rounded rect.
var li = {
w: 150, h: 20, s: 3, r: 3
w: 150, h: 23, s: 3, r: 3
};

var legend = d3.select("#legend").append("svg:svg")
Expand Down
22 changes: 4 additions & 18 deletions dev/audit-log-review/sunburst/README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,8 @@
(This is a minimal update of my original [Sequences sunburst](http://bl.ocks.org/kerryrodden/7090426) block, to work correctly with d3 v4.)
Visualization showing API server endpoints used by Kubernetes apps

This example shows how it is possible to use a [D3 sunburst visualization](http://bl.ocks.org/mbostock/4063423) (partition layout) with data that describes sequences of events.
To generate the data:

A good use case is to summarize navigation paths through a web site, as in the sample synthetic data file (visit_sequences.csv). The visualization makes it easy to understand visits that start directly on a product page (e.g. after landing there from a search engine), compared to visits where users arrive on the site's home page and navigate from there. Where a funnel lets you understand a single pre-selected path, this allows you to see all possible paths.
```python create_chart_csv.py```

Features:

* works with data that is in a CSV format (you don't need to pre-generate a hierarchical JSON file, unless your data file is very large)
* interactive breadcrumb trail helps to emphasize the sequence, so that it is easy for a first-time user to understand what they are seeing
* percentages are shown explicitly, to help overcome the distortion of the data that occurs when using a radial presentation

If you want to simply reuse this with your own data, here are some tips for generating the CSV file:

* no header is required (but it's OK if one is present)
* use a hyphen to separate the steps in the sequence
* the step names should be one word only, and ideally should be kept short. Non-alphanumeric characters will probably cause problems (I haven't tested this).
* every sequence should have an "end" marker as the last element, *unless* it has been truncated because it is longer than the maximum sequence length (6, in the example). The purpose of the "end" marker is to distinguish a true end point (e.g. the user left the site) from an end point that has been forced by truncation.
* each line should be a complete path from root to leaf - don't include counts for intermediate steps. For example, include "home-search-end" and "home-search-product-end" but not "home-search" - the latter is computed by the partition layout, by adding up the counts of all the sequences with that prefix.
* to keep the number of permutations low, use a small number of unique step names, and a small maximum sequence length. Larger numbers of either of these will lead to a very large CSV that will be slow to process (and therefore require pre-processing into hierarchical JSON).

I created this example in my work at Google, but it is not part of any Google product. It is covered by the Apache license (see the LICENSE file).
This visualisation is based on kerryrodden's sunburst graph example found [here](https://gist.github.com/kerryrodden/766f8f6d31f645c39f488a0befa1e3c8)
1 change: 0 additions & 1 deletion dev/audit-log-review/sunburst/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
</div>
</div>
<div id="sidebar">
<!-- <input type="checkbox" id="togglelegend"> Legend<br/> -->
<div id="legend" style="visibility: hidden;"></div>
</div>
<script type="text/javascript" src="sequences.js"></script>
Expand Down