This notebook processes a small set of data for a toy shiny app.

Set gloabl options

knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='figs/',
                      echo=T, warning=FALSE, message=FALSE)

Load the data manipulation libraries

if (!require("DT")) install.packages('DT')
if (!require("dtplyr")) install.packages('dtplyr')
if (!require("lubridate")) install.packages('lubridate')
if (!require("ggmap")) install.packages('ggmap')
if (!require("choroplethrZip")) {
  # install.packages("devtools")
  library(devtools)
  install_github('arilamstein/choroplethrZip@v1.5.0')}

library(dtplyr)
library(dplyr)
library(DT)
library(lubridate)

Install shiny

if (!require("shiny")) install.packages("shiny")
library(shiny)
runExample("01_hello")

Shiny tutorials can be found at http://shiny.rstudio.com/articles/.

Data for this tutorial

From NYC open data, we downloaded 2009 housing sales data of Manhattan. datatable from the DT package is a great tool for exploring data in your R notebook or presentation.

mh2009=read.csv(file="../data/ManhattanHousing.csv")
datatable(sample_n(mh2009, 50))
mh2009=
  mh2009%>%
  filter(ZIP.CODE>0)%>%
  mutate(region=as.character(ZIP.CODE))

Number of sales

count.df=mh2009%>%
  group_by(region)%>%
  summarise(
  value=n()
  )
save(count.df, file="../output/count.RData")

Visualize using choroplethr

“A choropleth map (from Greek χώρο (”area/region“) + πλήθος (”multitude“)) is a thematic map in which areas are shaded or patterned in proportion to the measurement of the statistical variable being displayed on the map, such as population density or per-capita income.”

Install the package.

if (!require("choroplethr")) install.packages("choroplethr")
if (!require("devtools")) install.packages("devtools")

library(devtools)

if (!require("choroplethrZip")) 
  devtools::install_github('arilamstein/choroplethrZip@v1.5.0')

if (!require("ggplot2")) devtools::install_github("hadley/ggplot2")
if (!require("ggmap")) devtools::install_github("dkahle/ggmap")

2009 Manhattan housing sales

library(choroplethrZip)
zip_choropleth(count.df,
    title       = "2009 Manhattan housing sales",
    legend      = "Number of sales",
    county_zoom = 36061)

Geocode addresses

We can obtain more geo information about individual sales. You would need to sign up on Google Cloud to use the Geocoding API. Type ?register_google in RStudio and follow the steps.

library(ggmap)
library(dplyr)
mh2009.selgeo=
  mh2009%>%
  sample_n(100)%>%
  select(starts_with("ADD"))%>%
  mutate(ADDRESS_Ext=paste(ADDRESS, "New York, NY", sep=","))%>%
  mutate_geocode(ADDRESS_Ext)

mh2009.selgeo=mh2009.selgeo%>%
  na.omit()

Plot using longitude and latitude

library(ggmap)
ggmap(get_map("Manhattan",source="google", 
              maptype = "hybrid",
              zoom=12, color = "bw")) + 
  geom_point(data=mh2009.selgeo, aes(x=lon,y=lat),  color='red') 

Shiny app

A shiny app needs two files. - ui.r a user-interface script - server.r a server script (the actual analysis)

Prepare input data for the shiny app

mh2009.use=
  mh2009%>%
  mutate(sale.month=month(as.Date(SALE.DATE, "%m/%d/%y")))%>%
  mutate(sale.price=ifelse(SALE.PRICE==0, NA, SALE.PRICE))%>%
  mutate(footage=ifelse(GROSS.SQUARE.FEET==0, NA, GROSS.SQUARE.FEET))%>%
  mutate(unit.price=sale.price/footage)%>%
  mutate(bldg.type=substr(BUILDING.CLASS.CATEGORY, 1, 2))%>%
  filter(bldg.type %in% c("10", "13", "25", "28"))%>%
  arrange(bldg.type)
save(mh2009.use, file="../output/mh2009use.RData")

Prepare input data for the shiny app

man.nbhd=c("Central Harlem", "Chelsea and Clinton",
           "East Harlem", "Gramercy Park and Murray Hill",
           "Greenwich Village and Soho", "Lower Manhattan",
           "Lower East Side", "Upper East Side", "Upper West Side",
           "Inwood and Washington Heights")
zip.nbhd=list(1:length(man.nbhd))
zip.nbhd[[1]]=c(10026, 10027, 10030, 10037, 10039)
zip.nbhd[[2]]=c(10001, 10011, 10018, 10019, 10020)
zip.nbhd[[3]]=c(10036, 10029, 10035)
zip.nbhd[[4]]=c(10010, 10016, 10017, 10022)
zip.nbhd[[5]]=c(10012, 10013, 10014)
zip.nbhd[[6]]=c(10004, 10005, 10006, 10007, 10038, 10280)
zip.nbhd[[7]]=c(10002, 10003, 10009)
zip.nbhd[[8]]=c(10021, 10028, 10044, 10065, 10075, 10128)
zip.nbhd[[9]]=c(10023, 10024, 10025)
zip.nbhd[[10]]=c(10031, 10032, 10033, 10034, 10040)