library(tidyverse); library(lubridate); library(here); library(skimr); library(yaml); library(gplots)
inputfile <- here::here("write/input/uw-chr-i213-public.csv.gz")
i213 <- read_delim(
inputfile, delim = "|",
col_types = cols(
.default = col_character(),
source = col_factor(),
sex = col_factor(),
cmplxn = col_character(),
country_of_citizenship = col_factor(),
year = col_double(),
month = col_double(),
day = col_double(),
hour = col_double(),
minute = col_double(),
age = col_double(),
accompanied_juvenile_flag = col_double(),
unaccompanied_juvenile_flag = col_double(),
custody_redetermination_flag = col_double()
))
priority_counties <- read_yaml('../../shared/hand/priority_counties.yaml')
i213 <- i213 %>%
filter(!is.na(source))
i213 <- i213 %>%
mutate_at(vars(starts_with('mentions_')), as.logical)
Fields starting with “mentions_” are result of simple str_detect()
function for keywords in I-213 “Narrative” fields. Search terms are as follows:
mentions_airport
: '[Aa]irport'
mentions_anonymous_tip
: '[Aa]nonymous tip|concerned citizen'
mentions_border_patrol
: '[Bb]order [Pp]atrol'
mentions_bus
: '([Bb]us |[Bb]us\.|[Bb]us,)'
mentions_corrections
: '[Cc]orrections'
mentions_courthouse
: '([Cc]ourthouse|[Cc]ourt [Hh]ouse)'
mentions_database
: '[Dd]atabase'
mentions_detainer
: '[Dd]etainer|[I1]-247|[I1]-2[0Oo][0Oo]'
mentions_family_unit
: '[Ff]amily [Uu]nit'
mentions_greyhound
: '([Gg]reyhound|[Gg]rey [Hh]ound)'
mentions_hsi
: 'HSI|[Hh]omeland [Ss]ecurity [Ii]nvestigation'
mentions_jail
: '[Jj]ail'
mentions_juvenile
: '[Jj]uvenile'
mentions_police
: '[Pp]olice'
mentions_prison
: '[Pp]rison'
mentions_probation_office
: '[Pp]robation [Oo]ffice'
mentions_secure_comm
: '[Ss]ecure [Cc]ommunities|Immigration Alien Response|(IAR)'
mentions_sheriff
: '[Ss]heriff'
mentions_state_patrol
: '[Ss]tate [Pp]atrol'
mentions_surveillance
: 'surveillance'
mentions_task_force
: '[Tt]ask [Ff]orce'
mentions_traffic
: 'traffic'
In the below we compare these with method_location_apprehension
values in order to help gather context clues for decoding “Method of Location/Apprehension” codes.
skimr::skim(i213,
starts_with("mentions_")
)
Name | i213 |
Number of rows | 4052 |
Number of columns | 62 |
_______________________ | |
Column type frequency: | |
logical | 23 |
________________________ | |
Group variables | None |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
mentions_airport | 0 | 1 | 0.10 | FAL: 3662, TRU: 390 |
mentions_anonymous_tip | 0 | 1 | 0.01 | FAL: 4021, TRU: 31 |
mentions_border_patrol | 0 | 1 | 0.36 | FAL: 2591, TRU: 1461 |
mentions_bus | 0 | 1 | 0.03 | FAL: 3922, TRU: 130 |
mentions_corrections | 0 | 1 | 0.13 | FAL: 3540, TRU: 512 |
mentions_courthouse | 0 | 1 | 0.03 | FAL: 3948, TRU: 104 |
mentions_database | 0 | 1 | 0.30 | FAL: 2852, TRU: 1200 |
mentions_detainer | 0 | 1 | 0.32 | FAL: 2757, TRU: 1295 |
mentions_family_unit | 0 | 1 | 0.01 | FAL: 4008, TRU: 44 |
mentions_greyhound | 0 | 1 | 0.01 | FAL: 4014, TRU: 38 |
mentions_hsi | 0 | 1 | 0.04 | FAL: 3880, TRU: 172 |
mentions_jail | 0 | 1 | 0.47 | FAL: 2156, TRU: 1896 |
mentions_juvenile | 0 | 1 | 0.04 | FAL: 3910, TRU: 142 |
mentions_license_plate | 0 | 1 | 0.04 | FAL: 3872, TRU: 180 |
mentions_police | 0 | 1 | 0.18 | FAL: 3304, TRU: 748 |
mentions_prison | 0 | 1 | 0.19 | FAL: 3268, TRU: 784 |
mentions_probation_office | 0 | 1 | 0.00 | FAL: 4033, TRU: 19 |
mentions_secure_comm | 0 | 1 | 0.05 | FAL: 3868, TRU: 184 |
mentions_sheriff | 0 | 1 | 0.11 | FAL: 3614, TRU: 438 |
mentions_state_patrol | 0 | 1 | 0.02 | FAL: 3978, TRU: 74 |
mentions_surveillance | 0 | 1 | 0.11 | FAL: 3606, TRU: 446 |
mentions_task_force | 0 | 1 | 0.01 | FAL: 4002, TRU: 50 |
mentions_traffic | 0 | 1 | 0.05 | FAL: 3866, TRU: 186 |
i213 <- i213 %>%
mutate(method_loc_app_clean = case_when(
str_detect(method_location_apprehension, "PB") ~ "PB",
str_detect(method_location_apprehension, "CFD") ~ "CFD",
str_detect(method_location_apprehension, "CST") ~ "CST",
str_detect(method_location_apprehension, "CLC") ~ "CLC",
str_detect(method_location_apprehension, "NCA") ~ "NCA",
str_detect(method_location_apprehension, "LEA") ~ "LEA",
str_detect(method_location_apprehension, "OA") ~ "OA",
str_detect(method_location_apprehension, "OTF") ~ "OTF",
str_detect(method_location_apprehension, "TCB") ~ "TCB",
str_detect(method_location_apprehension, "ISP") ~ "ISP",
str_detect(method_location_apprehension, "L") ~ "L",
str_detect(method_location_apprehension, "O|0") ~ "O",
is.na(method_location_apprehension) ~ NA_character_,
TRUE ~ "OTHER"))
First we examine correlations of keyword mentions in I-213 narratives. Some strong positive correlations are trivial: e.g. “Greyhound” and “bus”; “prison” and “corrections”. Others may be interesting: i.e. positive correlation bewteen “database” and “jail” but negative correlation between “database” and “prison”.
data <- i213 %>%
select(starts_with('mentions'))
corrplot::corrplot(cor(data))
Comparing simple standardization of method_location_apprehension
with mentions_*
columns. Note similarity of “O”, “LEA”, “OTF” categories per clustering. Keywords largely seem appropriate in relation to proposed “Method of Location/Apprehension” values as discussed in https://uwchr.github.io/i-213-analysis/.
mentions_method <- i213 %>%
select(method_loc_app_clean, starts_with('mentions')) %>%
group_by(method_loc_app_clean) %>%
summarize_all(sum)
m <- mentions_method %>%
select(starts_with('mentions')) %>%
t %>%
as.matrix()
colnames(m) <- as.character(mentions_method$method_loc_app_clean)
heatmap.2(m,
scale = 'row',
density.info="none", # turns off density plot inside color legend
trace="none", # turns off trace lines inside the heat map
main="method_location_apprehension\nin search string mentions_*",
margins =c(12,12) )
Comparison after grouping “LEA/OTF/O”. Note “CLC” similartiy to NA values for method_location_apprehension
, which makes sense given “CLC” is most common value after simple standardization.
i213 <- i213 %>%
mutate(method_loc_app_clean = case_when(
str_detect(method_location_apprehension, "PB") ~ "PB",
str_detect(method_location_apprehension, "CFD") ~ "CFD",
str_detect(method_location_apprehension, "CST") ~ "CST",
str_detect(method_location_apprehension, "CLC") ~ "CLC",
str_detect(method_location_apprehension, "NCA") ~ "NCA",
str_detect(method_location_apprehension, "LEA") ~ "LEA/OTF/O",
str_detect(method_location_apprehension, "OA") ~ "OA",
str_detect(method_location_apprehension, "OTF") ~ "LEA/OTF/O",
str_detect(method_location_apprehension, "TCB") ~ "TCB",
str_detect(method_location_apprehension, "ISP") ~ "ISP",
str_detect(method_location_apprehension, "L") ~ "L",
str_detect(method_location_apprehension, "O|0") ~ "LEA/OTF/O",
is.na(method_location_apprehension) ~ NA_character_,
TRUE ~ "OTHER"))
mentions_method <- i213 %>%
select(method_loc_app_clean, starts_with('mentions')) %>%
group_by(method_loc_app_clean) %>%
summarize_all(sum)
m <- mentions_method %>%
select(starts_with('mentions')) %>%
t %>%
as.matrix()
colnames(m) <- as.character(mentions_method$method_loc_app_clean)
heatmap.2(m,
scale = 'row',
density.info="none", # turns off density plot inside color legend
trace="none", # turns off trace lines inside the heat map
main="method_location_apprehension\nin search string mentions_*",
margins =c(12,12) )
heatmap.2(m,
scale = 'column',
density.info="none", # turns off density plot inside color legend
trace="none", # turns off trace lines inside the heat map
main="search string mentions_* in\nmethod_location_apprehension",
margins =c(12,12) )