|
| 1 | +<!doctype html> |
| 2 | +<html lang="en"> |
| 3 | + <head> |
| 4 | + <meta charset="utf-8"> |
| 5 | + <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| 6 | + <meta http-equiv="last-modified" content="2017-02-08 23:51:37 +0100"> |
| 7 | + <meta name="viewport" content="width=device-width, initial-scale=1"> |
| 8 | + <!-- meta "search-domain" used for google site search function google_search() --> |
| 9 | + <meta name="search-domain" value="/swc-releases/2017.02/python-novice-inflammation"> |
| 10 | + <link rel="stylesheet" type="text/css" href="../assets/css/bootstrap.css" /> |
| 11 | + <link rel="stylesheet" type="text/css" href="../assets/css/bootstrap-theme.css" /> |
| 12 | + <link rel="stylesheet" type="text/css" href="../assets/css/lesson.css" /> |
| 13 | + |
| 14 | + <link rel="shortcut icon" type="image/x-icon" href="/favicon-swc.ico" /> |
| 15 | + |
| 16 | + |
| 17 | + <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> |
| 18 | + <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> |
| 19 | + <!--[if lt IE 9]> |
| 20 | + <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> |
| 21 | + <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> |
| 22 | + <![endif]--> |
| 23 | + <title>Programming with Python: Analyzing Data from Multiple Files</title> |
| 24 | + </head> |
| 25 | + <body> |
| 26 | + <div class="container"> |
| 27 | + <nav class="navbar navbar-default"> |
| 28 | + <div class="container-fluid"> |
| 29 | + <div class="navbar-header"> |
| 30 | + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false"> |
| 31 | + <span class="sr-only">Toggle navigation</span> |
| 32 | + <span class="icon-bar"></span> |
| 33 | + <span class="icon-bar"></span> |
| 34 | + <span class="icon-bar"></span> |
| 35 | + </button> |
| 36 | + |
| 37 | + |
| 38 | + |
| 39 | + <a href="https://software-carpentry.org" class="pull-left"> |
| 40 | + <img class="navbar-logo" src="../assets/img/swc-icon-blue.svg" alt="Software Carpentry logo" /> |
| 41 | + </a> |
| 42 | + |
| 43 | + |
| 44 | + |
| 45 | + <a class="navbar-brand" href="../">Home</a> |
| 46 | + |
| 47 | + </div> |
| 48 | + <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> |
| 49 | + <ul class="nav navbar-nav"> |
| 50 | + |
| 51 | + |
| 52 | + <li><a href="../conduct/">Code of Conduct</a></li> |
| 53 | + |
| 54 | + |
| 55 | + |
| 56 | + <li><a href="../setup/">Setup</a></li> |
| 57 | + <li><a href="../reference/">Reference</a></li> |
| 58 | + <li class="dropdown"> |
| 59 | + <a href="../" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Episodes <span class="caret"></span></a> |
| 60 | + <ul class="dropdown-menu"> |
| 61 | + |
| 62 | + <li><a href="../01-numpy/">Analyzing Patient Data</a></li> |
| 63 | + |
| 64 | + <li><a href="../02-loop/">Repeating Actions with Loops</a></li> |
| 65 | + |
| 66 | + <li><a href="../03-lists/">Storing Multiple Values in Lists</a></li> |
| 67 | + |
| 68 | + <li><a href="../04-files/">Analyzing Data from Multiple Files</a></li> |
| 69 | + |
| 70 | + <li><a href="../05-cond/">Making Choices</a></li> |
| 71 | + |
| 72 | + <li><a href="../06-func/">Creating Functions</a></li> |
| 73 | + |
| 74 | + <li><a href="../07-errors/">Errors and Exceptions</a></li> |
| 75 | + |
| 76 | + <li><a href="../08-defensive/">Defensive Programming</a></li> |
| 77 | + |
| 78 | + <li><a href="../09-debugging/">Debugging</a></li> |
| 79 | + |
| 80 | + <li><a href="../10-cmdline/">Command-Line Programs</a></li> |
| 81 | + |
| 82 | + </ul> |
| 83 | + </li> |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | + |
| 88 | + <li class="dropdown"> |
| 89 | + <a href="../" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Extras <span class="caret"></span></a> |
| 90 | + <ul class="dropdown-menu"> |
| 91 | + |
| 92 | + <li><a href="../about/">About</a></li> |
| 93 | + |
| 94 | + <li><a href="../discuss/">Discussion</a></li> |
| 95 | + |
| 96 | + <li><a href="../figures/">Figures</a></li> |
| 97 | + |
| 98 | + <li><a href="../guide/">Instructor Notes</a></li> |
| 99 | + |
| 100 | + </ul> |
| 101 | + </li> |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | + <li><a href="../license/">License</a></li> |
| 106 | + </ul> |
| 107 | + <form class="navbar-form navbar-right" role="search" id="search" onsubmit="google_search(); return false;"> |
| 108 | + <div class="form-group"> |
| 109 | + <input type="text" id="google-search" placeholder="Search..." aria-label="Google site search"> |
| 110 | + </div> |
| 111 | + </form> |
| 112 | + </div> |
| 113 | + </div> |
| 114 | +</nav> |
| 115 | + |
| 116 | + |
| 117 | + |
| 118 | + |
| 119 | + |
| 120 | +<div class="row"> |
| 121 | + <div class="col-md-1"> |
| 122 | + <h3> |
| 123 | + |
| 124 | + <a href="../03-lists/"><span class="glyphicon glyphicon-menu-left" aria-hidden="true"></span><span class="sr-only">previous episode</span></a> |
| 125 | + |
| 126 | + </h3> |
| 127 | + </div> |
| 128 | + <div class="col-md-10"> |
| 129 | + |
| 130 | + <h3 class="maintitle"><a href="../">Programming with Python</a></h3> |
| 131 | + <h1 class="maintitle">Analyzing Data from Multiple Files</h1> |
| 132 | + |
| 133 | + </div> |
| 134 | + <div class="col-md-1"> |
| 135 | + <h3> |
| 136 | + |
| 137 | + <a href="../05-cond/"><span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span><span class="sr-only">next episode</span></a> |
| 138 | + |
| 139 | + </h3> |
| 140 | + </div> |
| 141 | +</div> |
| 142 | + |
| 143 | + |
| 144 | +<blockquote class="objectives"> |
| 145 | + <h2>Overview</h2> |
| 146 | + |
| 147 | + <div class="row"> |
| 148 | + <div class="col-md-3"> |
| 149 | + <strong>Teaching:</strong> 20 min |
| 150 | + <br/> |
| 151 | + <strong>Exercises:</strong> 0 min |
| 152 | + </div> |
| 153 | + <div class="col-md-9"> |
| 154 | + <strong>Questions</strong> |
| 155 | + <ul> |
| 156 | + |
| 157 | + <li><p>How can I do the same operations on many different files?</p> |
| 158 | +</li> |
| 159 | + |
| 160 | + </ul> |
| 161 | + </div> |
| 162 | + </div> |
| 163 | + |
| 164 | + <div class="row"> |
| 165 | + <div class="col-md-3"> |
| 166 | + </div> |
| 167 | + <div class="col-md-9"> |
| 168 | + <strong>Objectives</strong> |
| 169 | + <ul> |
| 170 | + |
| 171 | + <li><p>Use a library function to get a list of filenames that match a simple wildcard pattern.</p> |
| 172 | +</li> |
| 173 | + |
| 174 | + <li><p>Write a for loop to process multiple files.</p> |
| 175 | +</li> |
| 176 | + |
| 177 | + </ul> |
| 178 | + </div> |
| 179 | + </div> |
| 180 | + |
| 181 | +</blockquote> |
| 182 | + |
| 183 | +<p>We now have almost everything we need to process all our data files. |
| 184 | +The only thing that’s missing is a library with a rather unpleasant name:</p> |
| 185 | + |
| 186 | +<div class="python highlighter-rouge"><pre class="highlight"><code>import glob |
| 187 | +</code></pre> |
| 188 | +</div> |
| 189 | + |
| 190 | +<p>The <code class="highlighter-rouge">glob</code> library contains a function, also called <code class="highlighter-rouge">glob</code>, |
| 191 | +that finds files and directories whose names match a pattern. |
| 192 | +We provide those patterns as strings: |
| 193 | +the character <code class="highlighter-rouge">*</code> matches zero or more characters, |
| 194 | +while <code class="highlighter-rouge">?</code> matches any one character. |
| 195 | +We can use this to get the names of all the CSV files in the current directory:</p> |
| 196 | + |
| 197 | +<div class="python highlighter-rouge"><pre class="highlight"><code>print(glob.glob('data/inflammation*.csv')) |
| 198 | +</code></pre> |
| 199 | +</div> |
| 200 | + |
| 201 | +<div class="output highlighter-rouge"><pre class="highlight"><code>['data/inflammation-05.csv', 'data/inflammation-11.csv', 'data/inflammation-12.csv', 'data/inflammation-08.csv', 'data/inflammation-03.csv', 'data/inflammation-06.csv', 'data/inflammation-09.csv', 'data/inflammation-07.csv', 'data/inflammation-10.csv', 'data/inflammation-02.csv', 'data/inflammation-04.csv', 'data/inflammation-01.csv'] |
| 202 | +</code></pre> |
| 203 | +</div> |
| 204 | + |
| 205 | +<p>As these examples show, |
| 206 | +<code class="highlighter-rouge">glob.glob</code>’s result is a list of file and directory paths in arbitrary order. |
| 207 | +This means we can loop over it |
| 208 | +to do something with each filename in turn. |
| 209 | +In our case, |
| 210 | +the “something” we want to do is generate a set of plots for each file in our inflammation dataset. |
| 211 | +If we want to start by analyzing just the first three files in alphabetical order, we can use the <code class="highlighter-rouge">sorted</code> built-in function to generate a new sorted list from the <code class="highlighter-rouge">glob.glob</code> output:</p> |
| 212 | + |
| 213 | +<div class="python highlighter-rouge"><pre class="highlight"><code>import numpy |
| 214 | +import matplotlib.pyplot |
| 215 | + |
| 216 | +filenames = sorted(glob.glob('data/inflammation*.csv')) |
| 217 | +filenames = filenames[0:3] |
| 218 | +for f in filenames: |
| 219 | + print(f) |
| 220 | + |
| 221 | + data = numpy.loadtxt(fname=f, delimiter=',') |
| 222 | + |
| 223 | + fig = matplotlib.pyplot.figure(figsize=(10.0, 3.0)) |
| 224 | + |
| 225 | + axes1 = fig.add_subplot(1, 3, 1) |
| 226 | + axes2 = fig.add_subplot(1, 3, 2) |
| 227 | + axes3 = fig.add_subplot(1, 3, 3) |
| 228 | + |
| 229 | + axes1.set_ylabel('average') |
| 230 | + axes1.plot(numpy.mean(data, axis=0)) |
| 231 | + |
| 232 | + axes2.set_ylabel('max') |
| 233 | + axes2.plot(numpy.max(data, axis=0)) |
| 234 | + |
| 235 | + axes3.set_ylabel('min') |
| 236 | + axes3.plot(numpy.min(data, axis=0)) |
| 237 | + |
| 238 | + fig.tight_layout() |
| 239 | + matplotlib.pyplot.show() |
| 240 | +</code></pre> |
| 241 | +</div> |
| 242 | + |
| 243 | +<div class="output highlighter-rouge"><pre class="highlight"><code>inflammation-01.csv |
| 244 | +</code></pre> |
| 245 | +</div> |
| 246 | + |
| 247 | +<p><img src="../fig/03-loop_49_1.png" alt="Analysis of inflammation-01.csv" /></p> |
| 248 | + |
| 249 | +<div class="output highlighter-rouge"><pre class="highlight"><code>inflammation-02.csv |
| 250 | +</code></pre> |
| 251 | +</div> |
| 252 | + |
| 253 | +<p><img src="../fig/03-loop_49_3.png" alt="Analysis of inflammation-02.csv" /></p> |
| 254 | + |
| 255 | +<div class="output highlighter-rouge"><pre class="highlight"><code>inflammation-03.csv |
| 256 | +</code></pre> |
| 257 | +</div> |
| 258 | + |
| 259 | +<p><img src="../fig/03-loop_49_5.png" alt="Analysis of inflammation-03.csv" /></p> |
| 260 | + |
| 261 | +<p>Sure enough, |
| 262 | +the maxima of the first two data sets show exactly the same ramp as the first, |
| 263 | +and their minima show the same staircase structure; |
| 264 | +a different situation has been revealed in the third dataset, |
| 265 | +where the maxima are a bit less regular, but the minima are consistently zero.</p> |
| 266 | + |
| 267 | +<blockquote class="challenge"> |
| 268 | + <h2 id="plotting-differences">Plotting Differences</h2> |
| 269 | + |
| 270 | + <p>Plot the difference between the average of the first dataset |
| 271 | +and the average of the second dataset, |
| 272 | +i.e., the difference between the leftmost plot of the first two figures.</p> |
| 273 | + |
| 274 | + <blockquote class="solution"> |
| 275 | + <h2 id="solution">Solution</h2> |
| 276 | + <div class="python highlighter-rouge"><pre class="highlight"><code>import glob |
| 277 | +import numpy |
| 278 | +import matplotlib.pyplot |
| 279 | + |
| 280 | +filenames = glob.glob('data/inflammation*.csv') |
| 281 | + |
| 282 | +data0 = numpy.loadtxt(fname=filenames[0], delimiter=',') |
| 283 | +data1 = numpy.loadtxt(fname=filenames[1], delimiter=',') |
| 284 | + |
| 285 | +fig = matplotlib.pyplot.figure(figsize=(10.0, 3.0)) |
| 286 | + |
| 287 | +matplotlib.pyplot.ylabel('Difference in average') |
| 288 | +matplotlib.pyplot.plot(data0.mean(axis=0) - data1.mean(axis=0)) |
| 289 | + |
| 290 | +fig.tight_layout() |
| 291 | +matplotlib.pyplot.show() |
| 292 | +</code></pre> |
| 293 | + </div> |
| 294 | + </blockquote> |
| 295 | +</blockquote> |
| 296 | + |
| 297 | +<blockquote class="challenge"> |
| 298 | + <h2 id="generate-composite-statistics">Generate Composite Statistics</h2> |
| 299 | + |
| 300 | + <p>Use each of the files once to generate a dataset containing values averaged over all patients:</p> |
| 301 | + |
| 302 | + <div class="python highlighter-rouge"><pre class="highlight"><code>filenames = glob.glob('data/inflammation*.csv') |
| 303 | +composite_data = numpy.zeros((60,40)) |
| 304 | +for f in filenames: |
| 305 | + # sum each new file's data into as it's read |
| 306 | +# and then divide the composite_data by number of samples |
| 307 | +composite_data /= len(filenames) |
| 308 | +</code></pre> |
| 309 | + </div> |
| 310 | + |
| 311 | + <p>Then use pyplot to generate average, max, and min for all patients.</p> |
| 312 | + |
| 313 | +</blockquote> |
| 314 | + |
| 315 | +<blockquote class="keypoints"> |
| 316 | + <h2>Key Points</h2> |
| 317 | + <ul> |
| 318 | + |
| 319 | + <li><p>Use <code class="highlighter-rouge">glob.glob(pattern)</code> to create a list of files whose names match a pattern.</p> |
| 320 | +</li> |
| 321 | + |
| 322 | + <li><p>Use <code class="highlighter-rouge">*</code> in a pattern to match zero or more characters, and <code class="highlighter-rouge">?</code> to match any single character.</p> |
| 323 | +</li> |
| 324 | + |
| 325 | + </ul> |
| 326 | +</blockquote> |
| 327 | + |
| 328 | + |
| 329 | + |
| 330 | + |
| 331 | + |
| 332 | +<div class="row"> |
| 333 | + <div class="col-md-1"> |
| 334 | + <h3> |
| 335 | + |
| 336 | + <a href="../03-lists/"><span class="glyphicon glyphicon-menu-left" aria-hidden="true"></span><span class="sr-only">previous episode</span></a> |
| 337 | + |
| 338 | + </h3> |
| 339 | + </div> |
| 340 | + <div class="col-md-10"> |
| 341 | + |
| 342 | + </div> |
| 343 | + <div class="col-md-1"> |
| 344 | + <h3> |
| 345 | + |
| 346 | + <a href="../05-cond/"><span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span><span class="sr-only">next episode</span></a> |
| 347 | + |
| 348 | + </h3> |
| 349 | + </div> |
| 350 | +</div> |
| 351 | + |
| 352 | + |
| 353 | + |
| 354 | + <footer> |
| 355 | + <div class="row"> |
| 356 | + <div class="col-md-6" align="left"> |
| 357 | + <h4> |
| 358 | + Copyright © 2016 |
| 359 | + <a href="https://software-carpentry.org">Software Carpentry Foundation</a> |
| 360 | + |
| 361 | + </h4> |
| 362 | + </div> |
| 363 | + <div class="col-md-6" align="right"> |
| 364 | + <h4> |
| 365 | + <a href="/">Source</a> |
| 366 | + / |
| 367 | + <a href="/blob/gh-pages/CONTRIBUTING.md">Contributing</a> |
| 368 | + / |
| 369 | + <a href=" mailto:[email protected]" >Contact </a> |
| 370 | + </h4> |
| 371 | + </div> |
| 372 | + </div> |
| 373 | +</footer> |
| 374 | + |
| 375 | + |
| 376 | + </div> |
| 377 | + <script src="../assets/js/jquery.min.js"></script> |
| 378 | +<script src="../assets/js/bootstrap.min.js"></script> |
| 379 | +<script src="../assets/js/lesson.js"></script> |
| 380 | +<script> |
| 381 | + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| 382 | + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| 383 | + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| 384 | + })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| 385 | + ga('create', 'UA-37305346-2', 'auto'); |
| 386 | + ga('send', 'pageview'); |
| 387 | +</script> |
| 388 | + |
| 389 | + </body> |
| 390 | +</html> |
0 commit comments