Introduction: The goal of this project is to try to code up a simple system that reads and processes crime record data in the Baltimore Police Department, and visualizes crime location patterns depending on userspecified conditions.
Download, unzip and read the shape file:
library(maps)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: FALSE
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
url_zip <- 'https://dl.dropboxusercontent.com/s/chyvmlrkkk4jcgb/school_distr.zip'
if(!file.exists('school_distr.zip')) download.file(url_zip, 'school_distr.zip')
# download file as zip
unzip('school_distr.zip') # unzip in the default folder
schdstr_shp <- readShapePoly('school.shp') # read shape file
## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read
xlim <- schdstr_shp@bbox[1,]
ylim <- schdstr_shp@bbox[2,]
Download and load the crime csv data:
url_csv <- 'https://dl.dropboxusercontent.com/s/4hg5ffdds9n2nx3/baltimore_crime.csv'
if(!file.exists('baltimore_crime.csv')) download.file(url_csv, 'baltimore_crime.csv')
df <- read.csv('baltimore_crime.csv', stringsAsFactors = F)
Transform dates and time variables depending on what we need:
typeof(df$CrimeDate)
## [1] "character"
df$year<-substr(df$CrimeDate,7,10)
df$month<-substr(df$CrimeDate,1,2)
df$day<-substr(df$CrimeDate,4,5)
df$CrimeDate <- as.Date(df$CrimeDate,"%m/%d/%Y")
df$year <- format(df$CrimeDate,"%Y")
df$month <- format(df$CrimeDate,"%m")
df$day <- format(df$CrimeDate,"%d")
df$time <- as.numeric(substr(df$CrimeTime,1,2))
Transform coordinates data into numeric: In the original data, there is a column called “Location1”. This variable indicate pairs of “(latitude,longitude)”. What I am doing below is to obtain two numeric variables of longitude and latitude.
df$Location1 <- gsub("[()]", "", df$Location1)
getla <- function(x){
grep("\\d+",unlist(strsplit(x,","))[1],value=T)
} #just use the general idea and replace x with the specific one when applying the function
getlo <- function(x){
grep("\\-\\d+",unlist(strsplit(x,","))[2],value=T)
}
df$latitude <- sapply(df$Location1,function(x) getla(x)) # work on each element
df$longitude <- sapply(df$Location1,function(x) getlo(x))
Summarize geographic and time patterns in assault-class of crimes: Summarize the geographic and time patterns of crimes with the keyword “ASSAULT” in it.In terms of time patterns, split a day into 4 sections: morning (6:00 am to 12:00 pm), afternoon (12:00 pm to 6:00 pm), evening (6:00 pm to 12:00 am) and mid-night(12:00 am to 6:00 am), and separately plot the 4 geographic patterns.
par(mfrow = c(2, 2))
des <- grep("ASSAULT",df$Description,value=T)
for (t in c(0,6,12,18)){
plot(schdstr_shp, axes = T)
points(df$longitude[df$Description %in% des&df$time<t+6],df$latitude[df$Description %in% des&df$time<t+6], type = "p", col=rgb(0,0,1,0.05),pch=19,cex=0.3)
}