kubernetes-sigs · hh · May 10, 2018 · May 10, 2018 · May 10, 2018 · May 10, 2018
diff --git a/dev/audit-log-review/create_app_usage_sheet.py b/dev/audit-log-review/create_app_usage_sheet.py
@@ -0,0 +1,30 @@
+import csv
+
+from lib.models import *
+from collections import defaultdict
+
+from pony.orm import db_session
+
+@db_session
+def main():
+    headers = ["app", "level", "category", "count"]
+    results = []
+    for app in App.select(lambda x: True):
+        hits = EndpointHit.select(lambda hit: hit.app == app and hit.count > 0)
+        stats = defaultdict(lambda: defaultdict(int))
+        for hit in hits:
+            endpoint = hit.endpoint
+            stats[endpoint.level][endpoint.category] += 1
+        for level in stats:
+            for category, count in stats[level].items():
+                results += [[app.name, level, category, count]]
+
+    results = sorted(results, key=lambda x: (x[0], -x[-1], x[1], x[2]))
+    with open("output-apps.csv", "wb") as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
+        for result in results:
+            writer.writerow(result)
+
+
+main()
diff --git a/dev/audit-log-review/create_chart_csv.py b/dev/audit-log-review/create_chart_csv.py
@@ -0,0 +1,56 @@
+import csv
+
+import sys
+from lib.models import *
+
+from pony.orm import db_session, select, count
+
+from collections import defaultdict
+
+@db_session
+def main(appname=None):
+    results = []
+    apps = App.select(lambda x: appname is None or x.name == appname).order_by(App.name)
+
+    hits = select((hit.endpoint, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0)
+    misses = select((hit.endpoint.level, hit.endpoint.category, count()) for hit in EndpointHit if hit.app in apps and hit.count == 0)
+
+    # sort everything here
+    order_level = defaultdict(int)
+    for level, hitcount in select((hit.endpoint.level, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0):
+        order_level[level] = hitcount
+    order_level_category = defaultdict(lambda: defaultdict(int))
+    for level, category, hitcount in select((hit.endpoint.level, hit.endpoint.category, count()) for hit in EndpointHit if hit.app in apps and hit.count > 0):
+        order_level_category[level][category] = hitcount
+
+    def sort_hits_fn(row):
+        endpoint, hitcount = row
+        return (
+            -order_level[endpoint.level],
+            -order_level_category[endpoint.level][endpoint.category],
+            -hitcount,
+            endpoint.url,
+            endpoint.method)
+
+    hits = sorted(hits, key=sort_hits_fn)
+    print len(hits), "hits"
+
+    with open('output-chart.csv', 'wb') as f:
+        writer = csv.writer(f)
+
+        headers = ['level', 'category', 'method + url', 'count']
+        writer.writerow(headers)
+        for endpoint, count in hits:
+            category = endpoint.category
+            if category == '':
+                category = "uncategorized"
+            writer.writerow([endpoint.level, category, endpoint.method + " " + endpoint.url, count])
+        for level, category, count in misses:
+            if category == '':
+                category = "uncategorized"
+            writer.writerow([level, category, "unused", count]) 
+
+if len(sys.argv) > 1:
+    main(sys.argv[1])
+else:
+    main()
diff --git a/dev/audit-log-review/create_spreadsheet.py b/dev/audit-log-review/create_spreadsheet.py
@@ -0,0 +1,57 @@
+import csv
+
+from lib.models import *
+
+from pony.orm import db_session
+
+
+@db_session
+def main():
+    results = []
+    apps = App.select(lambda x: True).order_by(App.name)
+    num_apps = len(apps)
+    endpoints = Endpoint.select(lambda x: True).order_by(Endpoint.level, Endpoint.url, Endpoint.method)
+    headers = ['level', 'category', 'method', 'url', 'conforms', 'apps using it']
+
+    for app in apps:
+        headers += [app.name]
+    # for app in apps:
+    #     headers += [app.name + ' count']
+
+    headers += ['questions']
+
+    for endpoint in endpoints:
+        result = [endpoint.level, endpoint.category, endpoint.method, endpoint.url, endpoint.conforms]
+        counts = []
+        hits = []
+        for app in apps:
+            hit = EndpointHit.get(endpoint=endpoint, app=app)
+            if hit is not None:
+                count = hit.count
+            else:
+                count = 0
+            # hits.append(('', 'x')[count > 0])
+            counts.append(count)
+        # result += hits
+        apps_using = len(filter(lambda x: x > 0, counts))
+        result += [apps_using]
+        result += counts
+        result += [endpoint.questions]
+        results += [result]
+
+    idx_using = 5
+    idx_method = 2
+    idx_url = 3
+    idx_level = 0
+    idx_category = 1
+
+    results = sorted(results, key=lambda x: (-x[idx_using]))
+
+    with open("output-spreadsheet.csv", "wb") as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
+        for result in results:
+            writer.writerow(result)
+
+
+main()
diff --git a/dev/audit-log-review/create_summaries.py b/dev/audit-log-review/create_summaries.py
@@ -0,0 +1,38 @@
+import csv
+
+from lib.models import *
+from collections import defaultdict
+
+from pony.orm import db_session
+
+@db_session
+def main():
+    headers = ["app", "stable", "beta", "alpha", "total", "conformance"]
+    results = []
+    for app in App.select(lambda x: True):
+        hits = EndpointHit.select(lambda hit: hit.app == app and hit.count > 0)
+        stats = defaultdict(lambda: defaultdict(int))
+        for hit in hits:
+            endpoint = hit.endpoint
+            stats[endpoint.level][endpoint.category] += 1
+
+        alphasum = sum(stats['alpha'].values())
+        betasum = sum(stats['beta'].values())
+        stablesum = sum(stats['stable'].values())
+        level = "stable"
+        if betasum > 0:
+            level = "beta"
+        if alphasum > 0:
+            level = "alpha"
+        total = alphasum + betasum + stablesum
+        results += [[app.name, stablesum, betasum, alphasum, total, level]]
+
+    results = sorted(results, key=lambda x: (x[0]))
+    with open("output-summary.csv", "wb") as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
+        for result in results:
+            writer.writerow(result)
+
+
+main()
diff --git a/dev/audit-log-review/index.html b/dev/audit-log-review/index.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>Sequences sunburst</title>
+
+  </head>
+  <body>
+      <div><a href="sunburst-apps/index.html">Apps</a></div>
+      <div><a href="sunburst/index.html">e2e</a></div>
+      <div><a href="https://docs.google.com/spreadsheets/d/1dgs6GlKeLB2GvadQxGmbRaDNSqDk6IlztZ-Npj4Mx6I/edit?usp=sharing">Spreadsheet with full results</a></div>
+</html>
diff --git a/dev/audit-log-review/sunburst-apps/README.md b/dev/audit-log-review/sunburst-apps/README.md
@@ -1,22 +1,8 @@
-(This is a minimal update of my original [Sequences sunburst](http://bl.ocks.org/kerryrodden/7090426) block, to work correctly with d3 v4.) 
+Visualization showing API server endpoints used by Kubernetes apps
 
-This example shows how it is possible to use a [D3 sunburst visualization](http://bl.ocks.org/mbostock/4063423) (partition layout) with data that describes sequences of events.
+To generate the data:
 
-A good use case is to summarize navigation paths through a web site, as in the sample synthetic data file (visit_sequences.csv). The visualization makes it easy to understand visits that start directly on a product page (e.g. after landing there from a search engine), compared to visits where users arrive on the site's home page and navigate from there. Where a funnel lets you understand a single pre-selected path, this allows you to see all possible paths.
+```python create_chart_csv.py```
 
-Features:
 
-* works with data that is in a CSV format (you don't need to pre-generate a hierarchical JSON file, unless your data file is very large) 
-* interactive breadcrumb trail helps to emphasize the sequence, so that it is easy for a first-time user to understand what they are seeing
-* percentages are shown explicitly, to help overcome the distortion of the data that occurs when using a radial presentation
-
-If you want to simply reuse this with your own data, here are some tips for generating the CSV file:
-
-* no header is required (but it's OK if one is present)
-* use a hyphen to separate the steps in the sequence
-* the step names should be one word only, and ideally should be kept short. Non-alphanumeric characters will probably cause problems (I haven't tested this).
-* every sequence should have an "end" marker as the last element, *unless* it has been truncated because it is longer than the maximum sequence length (6, in the example). The purpose of the "end" marker is to distinguish a true end point (e.g. the user left the site) from an end point that has been forced by truncation.
-* each line should be a complete path from root to leaf - don't include counts for intermediate steps. For example, include "home-search-end" and "home-search-product-end" but not "home-search" - the latter is computed by the partition layout, by adding up the counts of all the sequences with that prefix.
-* to keep the number of permutations low, use a small number of unique step names, and a small maximum sequence length. Larger numbers of either of these will lead to a very large CSV that will be slow to process (and therefore require pre-processing into hierarchical JSON).
-
-I created this example in my work at Google, but it is not part of any Google product. It is covered by the Apache license (see the LICENSE file).
+This visualisation is based on kerryrodden's sunburst graph example found [here](https://gist.github.com/kerryrodden/766f8f6d31f645c39f488a0befa1e3c8)
diff --git a/dev/audit-log-review/sunburst-apps/chart.html b/dev/audit-log-review/sunburst-apps/chart.html
@@ -10,7 +10,6 @@
   </head>
   <body>
     <div id="main">
-      <div id="sequence"></div>
       <div id="chart">
         <div id="explanation">
           <div id="reallybigline"></div>

diff --git a/dev/audit-log-review/sunburst-apps/sequences.css b/dev/audit-log-review/sunburst-apps/sequences.css
@@ -3,28 +3,29 @@ body {
   font-size: 12px;
   font-weight: 400;
   background-color: #fff;
-  width: 900px;
+  width: 1100px;
   height: 650px;
   margin-top: 10px;
 }
 
 #main {
-  float: left;
+  float: right;
   width: 750px;
 }
 
 #sidebar {
-  float: right;
-  width: 100px;
+  float: left;
+  padding-left: 20px;
 }
 
 #sequence {
   width: 600px;
   height: 70px;
+  display: none;
 }
 
 #legend {
-  padding: 80px 0 0 50px;
+  padding: 0 0 0 0;
 }
 
 #sequence text, #legend text {
@@ -43,7 +44,7 @@ body {
 #explanation {
   position: absolute;
   top: 250px;
-  left: 325px;
+  left: 200px;
   width: 250px;
   text-align: center;
   color: #000;

diff --git a/dev/audit-log-review/sunburst-apps/sequences.js b/dev/audit-log-review/sunburst-apps/sequences.js
@@ -4,7 +4,7 @@
 
 
 // Dimensions of sunburst.
-var width = 900;
+var width = 650;
 var height = 650;
 var radius = Math.min(width, height) / 2;
 
@@ -310,7 +310,7 @@ function drawLegend() {
 
   // Dimensions of legend item: width, height, spacing, radius of rounded rect.
   var li = {
-    w: 150, h: 20, s: 3, r: 3
+    w: 150, h: 23, s: 3, r: 3
   };
 
   var legend = d3.select("#legend").append("svg:svg")

diff --git a/dev/audit-log-review/sunburst/README.md b/dev/audit-log-review/sunburst/README.md
@@ -1,22 +1,8 @@
-(This is a minimal update of my original [Sequences sunburst](http://bl.ocks.org/kerryrodden/7090426) block, to work correctly with d3 v4.) 
+Visualization showing API server endpoints used by Kubernetes apps
 
-This example shows how it is possible to use a [D3 sunburst visualization](http://bl.ocks.org/mbostock/4063423) (partition layout) with data that describes sequences of events.
+To generate the data:
 
-A good use case is to summarize navigation paths through a web site, as in the sample synthetic data file (visit_sequences.csv). The visualization makes it easy to understand visits that start directly on a product page (e.g. after landing there from a search engine), compared to visits where users arrive on the site's home page and navigate from there. Where a funnel lets you understand a single pre-selected path, this allows you to see all possible paths.
+```python create_chart_csv.py```
 
-Features:
 
-* works with data that is in a CSV format (you don't need to pre-generate a hierarchical JSON file, unless your data file is very large) 
-* interactive breadcrumb trail helps to emphasize the sequence, so that it is easy for a first-time user to understand what they are seeing
-* percentages are shown explicitly, to help overcome the distortion of the data that occurs when using a radial presentation
-
-If you want to simply reuse this with your own data, here are some tips for generating the CSV file:
-
-* no header is required (but it's OK if one is present)
-* use a hyphen to separate the steps in the sequence
-* the step names should be one word only, and ideally should be kept short. Non-alphanumeric characters will probably cause problems (I haven't tested this).
-* every sequence should have an "end" marker as the last element, *unless* it has been truncated because it is longer than the maximum sequence length (6, in the example). The purpose of the "end" marker is to distinguish a true end point (e.g. the user left the site) from an end point that has been forced by truncation.
-* each line should be a complete path from root to leaf - don't include counts for intermediate steps. For example, include "home-search-end" and "home-search-product-end" but not "home-search" - the latter is computed by the partition layout, by adding up the counts of all the sequences with that prefix.
-* to keep the number of permutations low, use a small number of unique step names, and a small maximum sequence length. Larger numbers of either of these will lead to a very large CSV that will be slow to process (and therefore require pre-processing into hierarchical JSON).
-
-I created this example in my work at Google, but it is not part of any Google product. It is covered by the Apache license (see the LICENSE file).
+This visualisation is based on kerryrodden's sunburst graph example found [here](https://gist.github.com/kerryrodden/766f8f6d31f645c39f488a0befa1e3c8)
diff --git a/dev/audit-log-review/sunburst/index.html b/dev/audit-log-review/sunburst/index.html
@@ -21,7 +21,6 @@
       </div>
     </div>
     <div id="sidebar">
-      <!-- <input type="checkbox" id="togglelegend"> Legend<br/> -->
       <div id="legend" style="visibility: hidden;"></div>
     </div>
     <script type="text/javascript" src="sequences.js"></script>