|
| 1 | +# Reproducible Research: Peer Assessment 1 |
| 2 | + |
| 3 | + |
| 4 | +## Loading and preprocessing the data |
| 5 | + |
| 6 | +Read the orignial csv file into a data frame and load dplyr package for further analysis |
| 7 | + |
| 8 | +```r |
| 9 | +require(dplyr) |
| 10 | +dfOrig <- read.csv("activity.csv", colClasses = c("integer", "Date", "integer")) |
| 11 | +dfOrig <- tbl_df(dfOrig) |
| 12 | +``` |
| 13 | + |
| 14 | +## What is mean total number of steps taken per day? |
| 15 | + |
| 16 | +Calculate the total number of steps taken each day into a new data frame |
| 17 | + |
| 18 | +```r |
| 19 | +dfDay<- dfOrig %>% |
| 20 | + group_by(date) %>% |
| 21 | + summarize(totalSteps = sum(steps, na.rm = T)) |
| 22 | +``` |
| 23 | + |
| 24 | +Make a histogram |
| 25 | + |
| 26 | +```r |
| 27 | +hist(dfDay$totalSteps, |
| 28 | + main = "Historgram of total number of steps taken each day", |
| 29 | + xlab = "total steps taken per day") |
| 30 | +``` |
| 31 | + |
| 32 | + |
| 33 | + |
| 34 | +Calculate the mean and the median |
| 35 | + |
| 36 | +```r |
| 37 | +mean(dfDay$totalSteps) |
| 38 | +``` |
| 39 | + |
| 40 | +``` |
| 41 | +## [1] 9354.23 |
| 42 | +``` |
| 43 | + |
| 44 | +```r |
| 45 | +median(dfDay$totalSteps) |
| 46 | +``` |
| 47 | + |
| 48 | +``` |
| 49 | +## [1] 10395 |
| 50 | +``` |
| 51 | + |
| 52 | +## What is the average daily activity pattern? |
| 53 | + |
| 54 | +Calculate the average number of steps taken by 5-min interval into a new data frame |
| 55 | + |
| 56 | +```r |
| 57 | +dfInterval <- dfOrig %>% |
| 58 | + group_by(interval) %>% |
| 59 | + summarize(avgSteps = mean(steps, na.rm =T)) |
| 60 | +``` |
| 61 | + |
| 62 | +Make a time series plot |
| 63 | + |
| 64 | +```r |
| 65 | +with(dfInterval, plot(interval, avgSteps, |
| 66 | + type = "l", |
| 67 | + main = "Average Number of Steps by 5-min Interval", |
| 68 | + xlab = "5-min interval", |
| 69 | + ylab = "avg. number of steps each day")) |
| 70 | +``` |
| 71 | + |
| 72 | + |
| 73 | + |
| 74 | +Identify the top interval with highest average steps |
| 75 | + |
| 76 | +```r |
| 77 | +arrange(dfInterval, desc(avgSteps)) [1,] |
| 78 | +``` |
| 79 | + |
| 80 | +``` |
| 81 | +## Source: local data frame [1 x 2] |
| 82 | +## |
| 83 | +## interval avgSteps |
| 84 | +## 1 835 206.1698 |
| 85 | +``` |
| 86 | +It turns out to be the interval from 8h35 to 8h40. |
| 87 | + |
| 88 | +## Imputing missing values |
| 89 | + |
| 90 | +Find how many NA values that the dataset contains with summary function. |
| 91 | + |
| 92 | +```r |
| 93 | +summary(dfOrig) |
| 94 | +``` |
| 95 | + |
| 96 | +``` |
| 97 | +## steps date interval |
| 98 | +## Min. : 0.00 Min. :2012-10-01 Min. : 0.0 |
| 99 | +## 1st Qu.: 0.00 1st Qu.:2012-10-16 1st Qu.: 588.8 |
| 100 | +## Median : 0.00 Median :2012-10-31 Median :1177.5 |
| 101 | +## Mean : 37.38 Mean :2012-10-31 Mean :1177.5 |
| 102 | +## 3rd Qu.: 12.00 3rd Qu.:2012-11-15 3rd Qu.:1766.2 |
| 103 | +## Max. :806.00 Max. :2012-11-30 Max. :2355.0 |
| 104 | +## NA's :2304 |
| 105 | +``` |
| 106 | + |
| 107 | + |
| 108 | +## Are there differences in activity patterns between weekdays and weekends? |
0 commit comments