Tutorial 2: EDAV using shiny

This notebook processes a small set of data for a toy shiny app.

Set gloabl options

knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='figs/',
                      echo=T, warning=FALSE, message=FALSE)

Load the data manipulation libraries

if (!require("DT")) install.packages('DT')
if (!require("dtplyr")) install.packages('dtplyr')
if (!require("lubridate")) install.packages('lubridate')
if (!require("ggmap")) install.packages('ggmap')
if (!require("choroplethrZip")) {
  # install.packages("devtools")
  library(devtools)
  install_github('arilamstein/choroplethrZip@v1.5.0')}

library(dtplyr)
library(dplyr)
library(DT)
library(lubridate)

Install shiny

if (!require("shiny")) install.packages("shiny")
library(shiny)
runExample("01_hello")

Shiny tutorials can be found at http://shiny.rstudio.com/articles/.

Data for this tutorial

From NYC open data, we downloaded 2009 housing sales data of Manhattan. datatable from the DT package is a great tool for exploring data in your R notebook or presentation.

mh2009=read.csv(file="../data/ManhattanHousing.csv")
datatable(sample_n(mh2009, 50))

mh2009=
  mh2009%>%
  filter(ZIP.CODE>0)%>%
  mutate(region=as.character(ZIP.CODE))

Number of sales

count.df=mh2009%>%
  group_by(region)%>%
  summarise(
  value=n()
  )
save(count.df, file="../output/count.RData")

Visualize using `choroplethr`

“A choropleth map (from Greek χώρο (”area/region“) + πλήθος (”multitude“)) is a thematic map in which areas are shaded or patterned in proportion to the measurement of the statistical variable being displayed on the map, such as population density or per-capita income.”

Install the package.

if (!require("choroplethr")) install.packages("choroplethr")
if (!require("devtools")) install.packages("devtools")

library(devtools)

if (!require("choroplethrZip")) 
  devtools::install_github('arilamstein/choroplethrZip@v1.5.0')

if (!require("ggplot2")) devtools::install_github("hadley/ggplot2")
if (!require("ggmap")) devtools::install_github("dkahle/ggmap")

2009 Manhattan housing sales

library(choroplethrZip)
zip_choropleth(count.df,
    title       = "2009 Manhattan housing sales",
    legend      = "Number of sales",
    county_zoom = 36061)

Geocode addresses

We can obtain more geo information about individual sales. You would need to sign up on Google Cloud to use the Geocoding API. Type ?register_google in RStudio and follow the steps.

library(ggmap)
library(dplyr)
mh2009.selgeo=
  mh2009%>%
  sample_n(100)%>%
  select(starts_with("ADD"))%>%
  mutate(ADDRESS_Ext=paste(ADDRESS, "New York, NY", sep=","))%>%
  mutate_geocode(ADDRESS_Ext)

mh2009.selgeo=mh2009.selgeo%>%
  na.omit()

Plot using longitude and latitude

library(ggmap)
ggmap(get_map("Manhattan",source="google", 
              maptype = "hybrid",
              zoom=12, color = "bw")) + 
  geom_point(data=mh2009.selgeo, aes(x=lon,y=lat),  color='red')

Shiny app

A shiny app needs two files. - ui.r a user-interface script - server.r a server script (the actual analysis)

Prepare input data for the shiny app

mh2009.use=
  mh2009%>%
  mutate(sale.month=month(as.Date(SALE.DATE, "%m/%d/%y")))%>%
  mutate(sale.price=ifelse(SALE.PRICE==0, NA, SALE.PRICE))%>%
  mutate(footage=ifelse(GROSS.SQUARE.FEET==0, NA, GROSS.SQUARE.FEET))%>%
  mutate(unit.price=sale.price/footage)%>%
  mutate(bldg.type=substr(BUILDING.CLASS.CATEGORY, 1, 2))%>%
  filter(bldg.type %in% c("10", "13", "25", "28"))%>%
  arrange(bldg.type)
save(mh2009.use, file="../output/mh2009use.RData")

Prepare input data for the shiny app

man.nbhd=c("Central Harlem", "Chelsea and Clinton",
           "East Harlem", "Gramercy Park and Murray Hill",
           "Greenwich Village and Soho", "Lower Manhattan",
           "Lower East Side", "Upper East Side", "Upper West Side",
           "Inwood and Washington Heights")
zip.nbhd=list(1:length(man.nbhd))
zip.nbhd[[1]]=c(10026, 10027, 10030, 10037, 10039)
zip.nbhd[[2]]=c(10001, 10011, 10018, 10019, 10020)
zip.nbhd[[3]]=c(10036, 10029, 10035)
zip.nbhd[[4]]=c(10010, 10016, 10017, 10022)
zip.nbhd[[5]]=c(10012, 10013, 10014)
zip.nbhd[[6]]=c(10004, 10005, 10006, 10007, 10038, 10280)
zip.nbhd[[7]]=c(10002, 10003, 10009)
zip.nbhd[[8]]=c(10021, 10028, 10044, 10065, 10075, 10128)
zip.nbhd[[9]]=c(10023, 10024, 10025)
zip.nbhd[[10]]=c(10031, 10032, 10033, 10034, 10040)

Tutorial 2: EDAV using shiny

Tian Zheng

September 18, 2018

Set gloabl options

Load the data manipulation libraries

Install shiny

Data for this tutorial

Number of sales

Visualize using `choroplethr`

Install the package.

2009 Manhattan housing sales

Geocode addresses

Plot using longitude and latitude

Shiny app

Prepare input data for the shiny app

Prepare input data for the shiny app

Tutorial 2: EDAV using shiny

Tian Zheng

September 18, 2018

Set gloabl options

Load the data manipulation libraries

Install shiny

Data for this tutorial

Number of sales

Visualize using choroplethr

Install the package.

2009 Manhattan housing sales

Geocode addresses

Plot using longitude and latitude

Shiny app

Prepare input data for the shiny app

Prepare input data for the shiny app

Visualize using `choroplethr`