library(tidyverse); library(lubridate); library(here); library(skimr); library(yaml); library(gplots)
inputfile <- here::here("write/input/uw-chr-i213-public.csv.gz")

i213 <- read_delim(
    inputfile, delim = "|", 
    col_types = cols(
        .default                     = col_character(),
        source                       = col_factor(),
        sex                          = col_factor(),
        cmplxn                       = col_character(),
        country_of_citizenship       = col_factor(),
        year                         = col_double(),
        month                        = col_double(),
        day                          = col_double(),
        hour                         = col_double(),
        minute                       = col_double(),
        age                          = col_double(),
        accompanied_juvenile_flag    = col_double(),
        unaccompanied_juvenile_flag  = col_double(),
        custody_redetermination_flag = col_double()
))

priority_counties <- read_yaml('../../shared/hand/priority_counties.yaml')

i213 <- i213 %>%
    filter(!is.na(source))

i213 <- i213 %>%
    mutate_at(vars(starts_with('mentions_')), as.logical)

Fields starting with “mentions_” are result of simple str_detect() function for keywords in I-213 “Narrative” fields. Search terms are as follows:

In the below we compare these with method_location_apprehension values in order to help gather context clues for decoding “Method of Location/Apprehension” codes.

skimr::skim(i213, 
  starts_with("mentions_")
  )
Data summary
Name i213
Number of rows 4052
Number of columns 62
_______________________
Column type frequency:
logical 23
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
mentions_airport 0 1 0.10 FAL: 3662, TRU: 390
mentions_anonymous_tip 0 1 0.01 FAL: 4021, TRU: 31
mentions_border_patrol 0 1 0.36 FAL: 2591, TRU: 1461
mentions_bus 0 1 0.03 FAL: 3922, TRU: 130
mentions_corrections 0 1 0.13 FAL: 3540, TRU: 512
mentions_courthouse 0 1 0.03 FAL: 3948, TRU: 104
mentions_database 0 1 0.30 FAL: 2852, TRU: 1200
mentions_detainer 0 1 0.32 FAL: 2757, TRU: 1295
mentions_family_unit 0 1 0.01 FAL: 4008, TRU: 44
mentions_greyhound 0 1 0.01 FAL: 4014, TRU: 38
mentions_hsi 0 1 0.04 FAL: 3880, TRU: 172
mentions_jail 0 1 0.47 FAL: 2156, TRU: 1896
mentions_juvenile 0 1 0.04 FAL: 3910, TRU: 142
mentions_license_plate 0 1 0.04 FAL: 3872, TRU: 180
mentions_police 0 1 0.18 FAL: 3304, TRU: 748
mentions_prison 0 1 0.19 FAL: 3268, TRU: 784
mentions_probation_office 0 1 0.00 FAL: 4033, TRU: 19
mentions_secure_comm 0 1 0.05 FAL: 3868, TRU: 184
mentions_sheriff 0 1 0.11 FAL: 3614, TRU: 438
mentions_state_patrol 0 1 0.02 FAL: 3978, TRU: 74
mentions_surveillance 0 1 0.11 FAL: 3606, TRU: 446
mentions_task_force 0 1 0.01 FAL: 4002, TRU: 50
mentions_traffic 0 1 0.05 FAL: 3866, TRU: 186
i213 <- i213 %>% 
  mutate(method_loc_app_clean = case_when(
         str_detect(method_location_apprehension, "PB") ~ "PB",
         str_detect(method_location_apprehension, "CFD") ~ "CFD",
         str_detect(method_location_apprehension, "CST") ~ "CST",
         str_detect(method_location_apprehension, "CLC") ~ "CLC",
         str_detect(method_location_apprehension, "NCA") ~ "NCA",
         str_detect(method_location_apprehension, "LEA") ~ "LEA",
         str_detect(method_location_apprehension, "OA") ~ "OA",
         str_detect(method_location_apprehension, "OTF") ~ "OTF",
         str_detect(method_location_apprehension, "TCB") ~ "TCB",
         str_detect(method_location_apprehension, "ISP") ~ "ISP",
         str_detect(method_location_apprehension, "L") ~ "L",
         str_detect(method_location_apprehension, "O|0") ~ "O",
         is.na(method_location_apprehension) ~ NA_character_,
         TRUE ~ "OTHER"))

First we examine correlations of keyword mentions in I-213 narratives. Some strong positive correlations are trivial: e.g. “Greyhound” and “bus”; “prison” and “corrections”. Others may be interesting: i.e. positive correlation bewteen “database” and “jail” but negative correlation between “database” and “prison”.

data <- i213 %>%
  select(starts_with('mentions')) 

corrplot::corrplot(cor(data))

Comparing simple standardization of method_location_apprehension with mentions_* columns. Note similarity of “O”, “LEA”, “OTF” categories per clustering. Keywords largely seem appropriate in relation to proposed “Method of Location/Apprehension” values as discussed in https://uwchr.github.io/i-213-analysis/.

mentions_method <- i213 %>%
  select(method_loc_app_clean, starts_with('mentions')) %>% 
  group_by(method_loc_app_clean) %>% 
  summarize_all(sum)

m <- mentions_method %>% 
  select(starts_with('mentions')) %>%
  t %>% 
  as.matrix()

colnames(m) <- as.character(mentions_method$method_loc_app_clean)

heatmap.2(m,
          scale = 'row',
          density.info="none",  # turns off density plot inside color legend
          trace="none",         # turns off trace lines inside the heat map
          main="method_location_apprehension\nin search string mentions_*",
          margins =c(12,12) )

Comparison after grouping “LEA/OTF/O”. Note “CLC” similartiy to NA values for method_location_apprehension, which makes sense given “CLC” is most common value after simple standardization.

i213 <- i213 %>% 
  mutate(method_loc_app_clean = case_when(
         str_detect(method_location_apprehension, "PB") ~ "PB",
         str_detect(method_location_apprehension, "CFD") ~ "CFD",
         str_detect(method_location_apprehension, "CST") ~ "CST",
         str_detect(method_location_apprehension, "CLC") ~ "CLC",
         str_detect(method_location_apprehension, "NCA") ~ "NCA",
         str_detect(method_location_apprehension, "LEA") ~ "LEA/OTF/O",
         str_detect(method_location_apprehension, "OA") ~ "OA",
         str_detect(method_location_apprehension, "OTF") ~ "LEA/OTF/O",
         str_detect(method_location_apprehension, "TCB") ~ "TCB",
         str_detect(method_location_apprehension, "ISP") ~ "ISP",
         str_detect(method_location_apprehension, "L") ~ "L",
         str_detect(method_location_apprehension, "O|0") ~ "LEA/OTF/O",
         is.na(method_location_apprehension) ~ NA_character_,
         TRUE ~ "OTHER"))

mentions_method <- i213 %>%
  select(method_loc_app_clean, starts_with('mentions')) %>% 
  group_by(method_loc_app_clean) %>% 
  summarize_all(sum)

m <- mentions_method %>% 
  select(starts_with('mentions')) %>%
  t %>% 
  as.matrix()

colnames(m) <- as.character(mentions_method$method_loc_app_clean)

heatmap.2(m,
          scale = 'row',
          density.info="none",  # turns off density plot inside color legend
          trace="none",         # turns off trace lines inside the heat map
          main="method_location_apprehension\nin search string mentions_*",
          margins =c(12,12) )

heatmap.2(m,
          scale = 'column',
          density.info="none",  # turns off density plot inside color legend
          trace="none",         # turns off trace lines inside the heat map
          main="search string mentions_* in\nmethod_location_apprehension",
          margins =c(12,12) )