Skip to content

Timeout&web page refresh #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include README.md
include gpuview/views/index.tpl
include gpuview/views/body.tpl
include gpuview/service.sh
18 changes: 12 additions & 6 deletions gpuview/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,19 @@

EXCLUDE_SELF = False # Do not report to `/gpustat` calls.

REFRESH_TIME = 5

@app.route('/')
def index():
gpustats = core.all_gpustats()
now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S')
return template('index', gpustats=gpustats, update_time=now)
return template('index', update_time=REFRESH_TIME)

@app.route('/update', method='GET')
def update():
gpustats = core.all_gpustats(REFRESH_TIME)
now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S')
return template('body', gpustats=gpustats, refresh_time=now)

@app.route('/gpustat', methods=['GET'])
@app.route('/gpustat', method='GET')
def report_gpustat():
"""
Returns the gpustat of this host.
Expand All @@ -59,14 +63,16 @@ def main():

if 'run' == args.action:
core.safe_zone(args.safe_zone)
global EXCLUDE_SELF
global EXCLUDE_SELF, REFRESH_TIME
EXCLUDE_SELF = args.exclude_self
REFRESH_TIME = args.refresh_time
app.run(host=args.host, port=args.port, debug=args.debug)
elif 'service' == args.action:
core.install_service(host=args.host,
port=args.port,
safe_zone=args.safe_zone,
exclude_self=args.exclude_self)
exclude_self=args.exclude_self,
refresh_time=args.refresh_time)
elif 'add' == args.action:
core.add_host(args.url, args.name)
elif 'remove' == args.action:
Expand Down
44 changes: 28 additions & 16 deletions gpuview/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
"""

import os
import json
import subprocess
import asyncio
import aiohttp
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen


ABS_PATH = os.path.dirname(os.path.realpath(__file__))
HOSTS_DB = os.path.join(ABS_PATH, 'gpuhosts.db')
SAFE_ZONE = False # Safe to report all details.
Expand Down Expand Up @@ -80,8 +80,18 @@ def my_gpustat():
except Exception as e:
return {'error': '%s!' % getattr(e, 'message', str(e))}

async def async_fetch_gpustat(session, url):
try:
async with session.get(url + '/gpustat') as response:
gpustat = await response.json()
if gpustat and 'gpus' in gpustat:
return gpustat
except Exception as e:
print('Error: %s getting gpustat from %s' %
(getattr(e, 'message', str(e)), url))


def all_gpustats():
async def async_all_gpustats(int_timeout):
"""
Aggregates the gpustats of all registered hosts and this host.

Expand All @@ -95,19 +105,16 @@ def all_gpustats():
gpustats.append(mystat)

hosts = load_hosts()
for url in hosts:
try:
raw_resp = urlopen(url + '/gpustat')
gpustat = json.loads(raw_resp.read())
raw_resp.close()
if not gpustat or 'gpus' not in gpustat:
continue
timeout = aiohttp.ClientTimeout(total=int_timeout*0.9)
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = [async_fetch_gpustat(session, url) for url in hosts]
results = await asyncio.gather(*tasks)

for result, url in zip(results, hosts):
if result:
if hosts[url] != url:
gpustat['hostname'] = hosts[url]
gpustats.append(gpustat)
except Exception as e:
print('Error: %s getting gpustat from %s' %
(getattr(e, 'message', str(e)), url))
result['hostname'] = hosts[url]
gpustats.append(result)

try:
sorted_gpustats = sorted(gpustats, key=lambda g: g['hostname'])
Expand All @@ -117,6 +124,8 @@ def all_gpustats():
print("Error: %s" % getattr(e, 'message', str(e)))
return gpustats

def all_gpustats(timeout):
return asyncio.run(async_all_gpustats(timeout))

def load_hosts():
"""
Expand Down Expand Up @@ -176,7 +185,8 @@ def print_hosts():


def install_service(host=None, port=None,
safe_zone=False, exclude_self=False):
safe_zone=False, exclude_self=False,
refresh_time=None):
arg = ''
if host is not None:
arg += '--host %s ' % host
Expand All @@ -186,5 +196,7 @@ def install_service(host=None, port=None,
arg += '--safe-zone '
if exclude_self:
arg += '--exclude-self '
if refresh_time is not None:
arg += '--refresh-time %s' % refresh_time
script = os.path.join(ABS_PATH, 'service.sh')
subprocess.call('{} "{}"'.format(script, arg.strip()), shell=True)
2 changes: 2 additions & 0 deletions gpuview/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def arg_parser():
help="Report all details including usernames")
base_parser.add_argument('--exclude-self', action='store_true',
help="Don't report to others but self-dashboard")
base_parser.add_argument('--refresh-time', type=int, default=5,
help="Gpuview refresh time (default: 5 [sec])")

run_parser = subparsers.add_parser("run", parents=[base_parser],
help="Run gpuview server")
Expand Down
113 changes: 113 additions & 0 deletions gpuview/views/body.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<nav class="navbar navbar-expand-lg navbar-dark bg-dark fixed-top" id="mainNav">
<a class="navbar-brand" href="index.html">gpuview dashboard</a>
<button class="navbar-toggler navbar-toggler-right" type="button" data-toggle="collapse"
data-target="#navbarResponsive"
aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarResponsive">
<ul class="navbar-nav navbar-sidenav" id="exampleAccordion">
<li class="nav-item" data-toggle="tooltip" data-placement="right" title="Table">
<!-- a class="nav-link" href="#table">
<i class="fas fa-table"></i>
<span class="nav-link-text">Table</span>
</a -->
</li>
</ul>
</div>
</nav>
<div class="content-wrapper">
<div class="container-fluid" style="padding: 70px 40px 40px 40px">
<div class="row">
% for gpustat in gpustats:
% for gpu in gpustat.get('gpus', []):
<div class="col-xl-3 col-md-4 col-sm-6 mb-3">
<div class="card text-white {{ gpu.get('flag', '') }} o-hidden h-100">
<div class="card-body">
<div class="float-left">
<div class="card-body-icon">
<i class="fa fa-server"></i> <b>{{ gpustat.get('hostname', '-') }}</b>
</div>
<div>[{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }}</div>
</div>
</div>
<div class="card-footer text-white clearfix small z-1">
<span class="float-left">
<span class="text-nowrap">
<i class="fa fa-thermometer-three-quarters" aria-hidden="true"></i>
Temp. {{ gpu.get('temperature.gpu', '-') }}&#8451;
</span> |
<span class="text-nowrap">
<i class="fa fa-microchip" aria-hidden="true"></i>
Mem. {{ gpu.get('memory', '-') }}%
</span> |
<span class="text-nowrap">
<i class="fa fa-cogs" aria-hidden="true"></i>
Util. {{ gpu.get('utilization.gpu', '-') }}%
</span> |
<span class="text-nowrap">
<i class="fa fa-users" aria-hidden="true"></i>
{{ gpu.get('users', '-') }}
</span>
</span>
</div>
</div>
</div>
% end
% end
</div>
<!-- GPU Stat Card-->
<div class="card mb-3">
<div class="card-header">
<i class="fa fa-table"></i> All Hosts and GPUs</div>
<div class="card-body">
<div class="table-responsive">
<table class="table table-bordered" id="dataTable" width="100%" cellspacing="0">
<thead>
<tr>
<th scope="col">Host</th>
<th scope="col">GPU</th>
<th scope="col">Temp.</th>
<th scope="col">Util.</th>
<th scope="col">Memory Use/Cap</th>
<th scope="col">Power Use/Cap</th>
<th scope="col">User Processes</th>
</tr>
</thead>
<tbody>
% for gpustat in gpustats:
% for gpu in gpustat.get('gpus', []):
<tr class="small" id={{ gpustat.get('hostname', '-') }}>
<th scope="row">{{ gpustat.get('hostname', '-') }} </th>
<td> [{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }} </td>
<td> {{ gpu.get('temperature.gpu', '-') }}&#8451; </td>
<td> {{ gpu.get('utilization.gpu', '-') }}% </td>
<td> {{ gpu.get('memory', '-') }}% ({{ gpu.get('memory.used', '') }}/{{ gpu.get('memory.total', '-') }}) </td>
<td> {{ gpu.get('power.draw', '-') }} / {{ gpu.get('enforced.power.limit', '-') }} </td>
<td> {{ gpu.get('user_processes', '-') }} </td>
</tr>
% end
% end
</tbody>
</table>
</div>
</div>
<div class="card-footer small text-muted">{{ refresh_time }}</div>
</div>
<footer class="sticky-footer">
<div class="container">
<div class="text-center text-white">
<small><a href='https://github.com/fgaim/gpuview'>gpuview</a> © 2018</small>
</div>
</div>
</footer>
</div>
<script src="https://code.jquery.com/jquery-3.3.1.min.js"
integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8="
crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js"
integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl"
crossorigin="anonymous"></script>
<script src="https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
<script src="https://cdn.datatables.net/1.10.16/js/dataTables.bootstrap4.min.js"></script>
</div>
Loading