Analysis

Set Up

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load the Cleaned data

df_clean <- readRDS("data/child_protection_clean.rds")

Data Analysis

Q1

How does the number of abused children change each year?

df_clean |>
  filter(system == "CPR" & category == "Sex of Maltreated Child") |>
  group_by(year) |>
  summarise(n = sum(cases))
# A tibble: 19 × 2
    year     n
   <dbl> <dbl>
 1  2005   763
 2  2006   806
 3  2007   944
 4  2008   882
 5  2009   993
 6  2010  1001
 7  2011   877
 8  2012   894
 9  2013   963
10  2014   856
11  2015   874
12  2016   892
13  2017   947
14  2018  1064
15  2019  1006
16  2020   940
17  2021  1367
18  2022  1439
19  2023  1457

Q2

Which types of abuse of children are the most common each year?

df_clean |>
  filter(system == "CPR" & category == "Types of Harm/Maltreatment") |>
  arrange(year, -cases)
# A tibble: 95 × 6
   system subject                                 category     item   year cases
   <chr>  <chr>                                   <chr>        <chr> <dbl> <dbl>
 1 CPR    Newly Registered Child Protection Cases Types of Ha… Phys…  2005   413
 2 CPR    Newly Registered Child Protection Cases Types of Ha… Sexu…  2005   234
 3 CPR    Newly Registered Child Protection Cases Types of Ha… Mult…  2005    52
 4 CPR    Newly Registered Child Protection Cases Types of Ha… Negl…  2005    41
 5 CPR    Newly Registered Child Protection Cases Types of Ha… Psyc…  2005    23
 6 CPR    Newly Registered Child Protection Cases Types of Ha… Phys…  2006   438
 7 CPR    Newly Registered Child Protection Cases Types of Ha… Sexu…  2006   233
 8 CPR    Newly Registered Child Protection Cases Types of Ha… Negl…  2006    77
 9 CPR    Newly Registered Child Protection Cases Types of Ha… Mult…  2006    46
10 CPR    Newly Registered Child Protection Cases Types of Ha… Psyc…  2006    12
# ℹ 85 more rows

Q3

What is the average percentage of gender of abused child in all the survey years?

df_clean |>
  filter(system == "CPR" & category == "Sex of Maltreated Child") |>
  group_by(item) |>
  summarise(n = sum(cases)) |>
  mutate(total_n = sum(n)) |>
  mutate(percentage = n / total_n * 100)
# A tibble: 2 × 4
  item       n total_n percentage
  <chr>  <dbl>   <dbl>      <dbl>
1 Female 11203   18965       59.1
2 Male    7762   18965       40.9

Q4

What is the percentage of each type of abusers in the most recent year?

df_clean |>
  filter(system == "CPR" & category == "Perpetrator's Relationship with Maltreated Child" & year == 2023) |>
  group_by(item) |>
  summarise(n = sum(cases)) |>
  mutate(total_n = sum(n)) |>
  mutate(percentage = n / total_n * 100) |>
  arrange(-percentage)
# A tibble: 17 × 4
   item                                       n total_n percentage
   <chr>                                  <dbl>   <dbl>      <dbl>
 1 Parent                                   858    1539     55.8  
 2 Schoolmate / friend / peer               190    1539     12.3  
 3 Unrelated person (including strangers)   142    1539      9.23 
 4 Tutor / Coach                             63    1539      4.09 
 5 Unidentified person                       62    1539      4.03 
 6 Step-parent                               50    1539      3.25 
 7 Caregiver                                 36    1539      2.34 
 8 School teacher / personnel                29    1539      1.88 
 9 Grandparent                               28    1539      1.82 
10 Relative                                  25    1539      1.62 
11 Sibling                                   25    1539      1.62 
12 Family friend / Parent of peer            11    1539      0.715
13 Co-tenant / Neighbour                      9    1539      0.585
14 Inmate of residential service              5    1539      0.325
15 Religious personnel                        4    1539      0.260
16 Staff of boarding section of school        2    1539      0.130
17 Other                                      0    1539      0    

Data Visualization

library(ggrepel)
Warning: package 'ggrepel' was built under R version 4.4.2

Line Plot: The Number of Abused Children from 2005 to 2023

plot1 <- df_clean |>
  filter(system == "CPR" & category == "Sex of Maltreated Child") |>
  group_by(year) |>
  summarise(total_cases = sum(cases)) |>
  ggplot(aes(x = year, y = total_cases)) +
  geom_line(color = "skyblue") +
  geom_point(color = "skyblue", size = 2) +
  geom_label_repel(aes(label = total_cases), size = 3, color = "dodgerblue") +
  scale_x_continuous(breaks = seq(2005, 2023, 1)) +
  labs(title = "The Number of Abused Children from 2005 to 2023",
       x = "Year",
       y = "Number of Cases") +
  theme_light() +
  theme(legend.position = "none")
print(plot1)

ggsave("out/number.png", plot = plot1, width = 10, height = 6, dpi = 300)

Line Plot: The Number of Abuse Types from 2005 to 2023

plot2 <- df_clean |>
  filter(system == "CPR" & category == "Types of Harm/Maltreatment") |>
  group_by(year, item) |>
  ggplot(aes(x = year, y = cases, color = item)) +
  geom_line(aes(color = item)) +
  geom_point(aes(color = item)) +
  scale_x_continuous(breaks = seq(2005, 2023, 1)) +
  scale_color_brewer(palette = "Set2") +
  labs(title = "The Number of Abuse Types from 2005 to 2023",
       x = "Year",
       y = "Number of Cases",
       color = "Types of Abuse") +
  theme_bw() +
  theme(legend.position = "bottom")
print(plot2)

ggsave("out/types.png", plot = plot2, width = 9, height = 6, dpi = 300)

Plot with Facets

plot3 <- df_clean |>
  filter(system == "CPR" & category == "Types of Harm/Maltreatment") |>
  group_by(year, item) |>
  ggplot(aes(x = year, y = cases, color = item)) +
  geom_line(aes(color = item)) +
  scale_x_continuous(breaks = seq(2005, 2023, 3)) +
  scale_color_brewer(palette = "Set2") +
  labs(title = "The Number of Abuse Types from 2005 to 2023",
       x = "Year",
       y = "Number of Cases",
       color = "Types of Abuse") +
  facet_wrap(~item) +
  theme_bw() +
  theme(legend.position = "none")
print(plot3)

ggsave("out/types_facets.png", plot = plot3, width = 10, height = 6, dpi = 300)

Pie Chart: Average Percentage of Abused Children by Gender from 2005 to 2023

plot4 <- df_clean |> 
  filter(system == "CPR" & category == "Sex of Maltreated Child") |>  
  group_by(item) |>  
  summarise(n = sum(cases)) |>  
  mutate(total_n = sum(n)) |>  
  mutate(percentage = n / total_n * 100) |>  
  ggplot(aes(x = "", y = percentage, fill = item)) +  
  geom_bar(stat = "identity") +  
  coord_polar(theta = "y") +  
  scale_fill_manual(values = c("Male" = "lightblue", "Female" = "pink")) +
  geom_text(aes(label = paste0(round(percentage, 1), "%")),
  position = position_stack(vjust = 0.5),
  color = "white", size = 6) +
  theme_void() +  
  labs(title = "Average Percentage of Abused Children by Gender from 2005 to 2023", fill = "Gender")
print(plot4)

ggsave("out/gender.png", plot = plot4, width = 8, height = 6, dpi = 300)

Line Plot: The Number of Child Abuse Cases by Gender from 2005 to 2023

plot6 <- df_clean |>
  filter(system == "CPR" & category == "Sex of Maltreated Child") |>
  group_by(year, item) |>
  ggplot(aes(x = year, y = cases, color = item)) +
  geom_line(aes(color = item)) +
  geom_point(aes(color = item), size = 3) +
  scale_x_continuous(breaks = seq(2005, 2023, 1)) +
  scale_color_brewer(palette = "Set1") +
  geom_label_repel(aes(label = item), size = 3) +
  labs(title = "The Number of Child Abuse Cases by Gender from 2005 to 2023",
       x = "Year",
       y = "Number of Cases",
       color = "Gender") +
  theme_bw() +
  theme(legend.position = "none")
print(plot6)

ggsave("out/gender_year.png", plot = plot6, width = 10, height = 6, dpi = 300)

Bar Chart: The Number of Abusers by Type in the Most Recent Year (2023)

PS: To present a chart type different from the above, this one visualizes the case numbers rather than the percentages.

plot5 <- df_clean |>
  filter(system == "CPR" & category == "Perpetrator's Relationship with Maltreated Child" & year == 2023) |>
  group_by(item) |>
  ggplot(aes(x = fct_reorder(item, cases), y = cases, fill = item)) +
  geom_col(width = 0.8) +
  geom_text(aes(label = round(cases)), hjust = -0.1, size = 3) +
  coord_flip() +
  labs(title = "The Number of Abusers by Type in 2023",
       x = "Types of Abusers",
       y = "Number of cases") +
  theme_minimal() +
  theme(legend.position = "none")
print(plot5)

ggsave("out/abuser.png", plot = plot5, width = 10, height = 6, dpi = 300)