.Rhistory

pivot_longer(cols = -zip, names_to = "date", values_to = "value")
wide_transposed <- long_transposed |>
pivot_wider(names_from = date, values_from = value)
wide_transposed
long_transposed <- foreclosures |>
pivot_longer(cols = -zip, names_to = "date", values_to = "value")
long_transposed <- column_to_rownames(long_transposed, var = "zip")
long_transposed <- foreclosures |>
pivot_longer(cols = -zip, names_to = "date", values_to = "value")
wide_transposed <- long_transposed |>
pivot_wider(names_from = date, values_from = value)
wide_transposed <- column_to_rownames(wide_transposed, var = "zip")
wide_transposed
##TEST DO NOT USE
long_transposed <- foreclosures |>
pivot_longer(cols = -zip, names_to = "date", values_to = "value")
wide_transposed <- long_transposed |>
pivot_wider(names_from = date, values_from = value)
wide_transposed <- column_to_rownames(wide_transposed, var = "zip")
wide_transposed
knitr::opts_chunk$set(echo = TRUE)
#install.packages("zoo")
library(tidyverse)
library(dplyr)
library(janitor)
library(zoo)
library(ggplot2)
library(tidycensus)
library(lubridate)
# Read in and clean the data
foreclosures <- read_csv("Maryland_Notices_of_Intent_to_Foreclose.csv") |>
clean_names() |>
rename(february_2022 = february_2020)
foreclosures
# Here we wanted to try and make a quick map to see what we were looking at other than just a table of numbers.
foreclosures_sum <- foreclosures |>
group_by(zip) |>
summarise(foreclosures_sum = sum(across(contains("_")))) |>
arrange(desc(foreclosures_sum))
foreclosures_sum
write_csv(foreclosures_sum, "foreclosure_sum.csv")
foreclosures_sum |>
arrange(desc(foreclosures_sum))
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
# Time to chart it! We should also look at the percent change of other zip codes. NOTE: Getting an error on this one. Something with the date column. Error: Discrete value supplied to continuous scale.
top_foreclosure_zip |>
ggplot() +
geom_line(aes(x= date, y=foreclosures)) +
scale_x_continuous(breaks = 1:52) +
labs(
title="Foreclosure notices rose drastically in this PG County zip code",
x = "month",
y = "foreclosures notices issued",
caption = "Source: Office of Financial Regulation") +
theme(
axis.text.x = element_text(angle = 45,  hjust=1))
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y")) |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
# Time to chart it! We should also look at the percent change of other zip codes. NOTE: Getting an error on this one. Something with the date column. Error: Discrete value supplied to continuous scale.
top_foreclosure_zip |>
ggplot() +
geom_line(aes(x= date, y=foreclosures)) +
scale_x_continuous(breaks = 1:52) +
labs(
title="Foreclosure notices rose drastically in this PG County zip code",
x = "month",
y = "foreclosures notices issued",
caption = "Source: Office of Financial Regulation") +
theme(
axis.text.x = element_text(angle = 45,  hjust=1))
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
mutate(
date= mdy(date),
date = format(date, "%M_%Y")) |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y")) |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
# Time to chart it! We should also look at the percent change of other zip codes. NOTE: Getting an error on this one. Something with the date column. Error: Discrete value supplied to continuous scale.
top_foreclosure_zip |>
ggplot() +
geom_line(aes(x= date, y=foreclosures)) +
scale_x_continuous(breaks = 1:52) +
labs(
title="Foreclosure notices rose drastically in this PG County zip code",
x = "month",
y = "foreclosures notices issued",
caption = "Source: Office of Financial Regulation") +
theme(
axis.text.x = element_text(angle = 45,  hjust=1))
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y")) |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
write_csv(top_foreclosure_zip, "data/top_foreclosure_zip.csv")
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y"))
#month_year = paste(month.abb[month(date)], year(date), sep = "_"))
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
mutate(
date= mdy(date),
date = format(date, "%B_%Y")) |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
write_csv(top_foreclosure_zip, "top_foreclosure_zip.csv")
#write_csv(foreclosures_sum, "foreclosure_sum.csv")
setwd("~/Documents/GitHub/data_analysis")
knitr::opts_chunk$set(echo = TRUE)
#install.packages("zoo")
library(tidyverse)
library(dplyr)
library(janitor)
library(zoo)
library(ggplot2)
library(tidycensus)
library(lubridate)
# Read in and clean the data
foreclosures <- read_csv("Maryland_Notices_of_Intent_to_Foreclose.csv") |>
clean_names() |>
rename(february_2022 = february_2020)
foreclosures
# Here we wanted to try and make a quick map to see what we were looking at other than just a table of numbers.
foreclosures_sum <- foreclosures |>
group_by(zip) |>
summarise(foreclosures_sum = sum(across(contains("_")))) |>
arrange(desc(foreclosures_sum))
foreclosures_sum
write_csv(foreclosures_sum, "foreclosure_sum.csv")
foreclosures_sum |>
arrange(desc(foreclosures_sum))
# We transposed the data set to tackle this question. We had a lot of questions for ChatGPT https://chat.openai.com/share/a3d690f6-baec-4fc7-beb2-4844daf93de6 since there was an issue with getting R to recognize the first column as the first column.
transposed_redo <- foreclosures |>
gather(Date, Value, -1) |> spread(key = zip, value = Value) |>
mutate(month_year = as.yearmon(paste(Date, "01", sep = "_"), format="%B_%Y_%d"))
transposed_redo
# Here we made another attempt at transposing to format the dates using lubridate instead of the zoo package. However we continued to run into the issue of getting a character column instead of a date. We were, however, able to solve our issue with the date column being in a weird month_year format by using the format function.
transposed_redo_2 <- foreclosures |>
gather(date, value, -1) |>
spread(key = zip, value = value) |>
mutate(
date= mdy(date),
date=floor_date(date, "month"),
#month_year = format(date, "%B_%Y")
) #|>
#select(month_name, everything())
transposed_redo_2
#Now we're just cleaning a few things up. This dataframe is specific to the zip code 20772 for each month of the year from July 2021 to October 2023, so we just renamed the 20772 column "foreclosures" and kept the date and foreclosures columns before arranging by date. Because the date column is a character rather than an actual date, it's arranged in alphabetical order. We can work with that for now and separate them if we join later.
top_foreclosure_zip <- transposed_redo_2 |>
rename(foreclosures = "20772") |>
select(date, foreclosures) |>
arrange(date)
top_foreclosure_zip
write_csv(top_foreclosure_zip, "top_foreclosure_zip.csv")
# Time to chart it! We should also look at the percent change of other zip codes. NOTE: Getting an error on this one. Something with the date column. Error: Discrete value supplied to continuous scale.
top_foreclosure_zip |>
ggplot() +
geom_line(aes(x=date, y=foreclosures)) +
labs(
title="Foreclosure notices rose drastically in this PG County zip code",
x = "month",
y = "foreclosures notices issued",
caption = "Source: Office of Financial Regulation") +
theme(
axis.text.x = element_text(angle = 45,  hjust=1))
#  geom_text(check_overlap = TRUE, size = 2, aes(label = date), hjust = 0.5, vjust = -0.5)
# I want to compare the above chart to the overall pattern. Here I am just readjusting the transposed_redo data set so I can see the month_year column at the front and get rid of the original date column
transposed_redo_arranged <- transposed_redo_2 |>
#[, c(ncol(transposed_redo_2), 1:(ncol(transposed_redo_2)-1))] |>
select(-date)
transposed_redo_arranged
#Okay, now that we've cleaned that up a bit, let's count the sum of the rows. This first part stores the dataframe inside a new dataframe without the first column so we can add a column (mutate) that is the sum of all the other columns without pulling the date into the calculation.
col_sum <- transposed_redo_arranged |>
mutate(sum_col = rowSums(transposed_redo_arranged[,-1]))
#This next line creates a new dataframe, where we take our fresh one ^ and ask it to pull the new column from the end (sum_col) and place it at the front.
sum_arranged <- col_sum[, c(ncol(transposed_redo_2), 1:(ncol(col_sum)-1))]
sum_arranged
# Here I'm double checking that the first row is correct
rowSums(transposed_redo_arranged[1,-1])
#NOTE: Double check that this is correct as far as the average goes. As for the code, sum_arranged currently doesn't have a date column. Find a way to add that back in. col_bind?
average_foreclosures_month <- sum_arranged |>
mutate(average = (sum_col/552)) |>
select(date, sum_col, average) |>
arrange(desc(average))
# I want to compare the above chart to the overall pattern. Here I am just readjusting the transposed_redo data set so I can see the month_year column at the front and get rid of the original date column
transposed_redo_arranged <- transposed_redo_2 |>
#[, c(ncol(transposed_redo_2), 1:(ncol(transposed_redo_2)-1))] |>
select(-date)
transposed_redo_arranged
#Okay, now that we've cleaned that up a bit, let's count the sum of the rows. This first part stores the dataframe inside a new dataframe without the first column so we can add a column (mutate) that is the sum of all the other columns without pulling the date into the calculation.
col_sum <- transposed_redo_arranged |>
mutate(sum_col = rowSums(transposed_redo_arranged[,-1]))
#This next line creates a new dataframe, where we take our fresh one ^ and ask it to pull the new column from the end (sum_col) and place it at the front.
sum_arranged <- col_sum[, c(ncol(transposed_redo_2), 1:(ncol(col_sum)-1))]
sum_arranged
# Here I'm double checking that the first row is correct
rowSums(transposed_redo_arranged[1,-1])
#NOTE: Double check that this is correct as far as the average goes. As for the code, sum_arranged currently doesn't have a date column. Find a way to add that back in. col_bind?
average_foreclosures_month <- sum_arranged |>
mutate(average = (sum_col/552)) |>
select(date, sum_col, average) |>
arrange(desc(average))
# Here I'm double checking that the first row is correct
rowSums(transposed_redo_arranged[1,-1])
#Here we are using the zip from the census data to pull the population by zip. We then rename geoid, estimate and moe to zip, population and moe_pop
mdzip <- get_acs(geography="zcta", variables = "B02001_001", year=2021)
mdzip <- mdzip |>
clean_names() |>
#separate(name, c('county', 'state'), sep=",") |>
rename(zip=geoid, population=estimate, moe_pop=moe) |>
group_by(zip) |>
arrange(desc(population))
mdzip
#Here we are now joining the foreclosures_sum dataframe (the one that shows the total number of foreclosures for each zip code) with the mdzip data frame (which we made above). We then selected only the zip, foreclosure_sum, population and moe_pop columns.
foreclosure_state_pop <- left_join(foreclosures_sum, mdzip, join_by(zip))
foreclosure_state_pop <- foreclosure_state_pop |> select(zip, foreclosures_sum, population, moe_pop)
foreclosure_state_pop
#Here we're pulling the income from the census
md_income <- get_acs(geography="zcta", variables = "B05010_001", year=2021)
md_income <- md_income |>
clean_names() |>
rename(zip=geoid, income=estimate, moe_inc=moe) |>
group_by(zip) |>
arrange(desc(income))
md_income
foreclosure_pop_income <- left_join(foreclosure_state_pop, md_income, join_by(zip)) |>
select(zip, foreclosures_sum, population, moe_pop, income, moe_inc)
foreclosure_pop_income
per_capita <- foreclosure_pop_income |>
mutate(per_capita = (foreclosures_sum/population)*10000)
per_capita <- foreclosure_pop_income |>
mutate(per_capita = (foreclosures_sum/population)*10000)
per_capita
per_capita <- foreclosure_pop_income |>
mutate(per_capita = (foreclosures_sum/population)*10000) |>
arrange(desc(per_capita))
per_capita
per_capita <- foreclosure_pop_income |>
mutate(per_capita = (foreclosures_sum/population)*1000) |>
arrange(desc(per_capita))
per_capita
per_capita <- foreclosure_pop_income |>
filter(population >= 20000) |>
mutate(per_capita = (foreclosures_sum/population)*1000) |>
arrange(desc(per_capita))
per_capita
per_capita <- foreclosure_pop_income |>
filter(population >= 20000) |>
mutate(per_capita = (foreclosures_sum/population)*10000) |>
arrange(desc(per_capita))
per_capita
per_capita <- foreclosure_pop_income |>
#  filter(population >= 20000) |>
mutate(per_capita = (foreclosures_sum/population)*10000) |>
arrange(desc(per_capita))
per_capita
ggplot(per_capita, aes(x=income, y=per_capita)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=income, y=foreclosures_sum)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
per_capita |>
ggplot() +
geom_point(aes(x=income,y=foreclosures_sum)) +
geom_smooth(aes(x=income,y=foreclosures_sum), method="lm")
cor.test(per_capita$income, per_capita$foreclosures_sum)
ggplot(per_capita, aes(x=income, y=foreclosures_sum)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) #+
#    geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=income, y=foreclosures_sum)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
per_capita <- foreclosure_pop_income |>
#  filter(population >= 20000) |>
mutate(percapita = (foreclosures_sum/population)*10000) |>
arrange(desc(per_capita))
per_capita
ggplot(per_capita, aes(x=income, y=foreclosures_sum)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=income, y=per_capita)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=income, y=percapita)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=percapita, y=foreclosures_sum)) +
geom_point()+
labs(
title="Test",
x = "Median Income",
y = "foreclosure sum",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=percapita, y=income)) +
geom_point()+
labs(
title="Test",
x = "per capita",
y = "median income",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=percapita, y=income)) +
geom_point()+
labs(
title="Test",
x = "foreclosures per capita",
y = "median income",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
ggplot(per_capita, aes(x=income, y=percapita)) +
geom_point()+
labs(
title="Test",
x = "income",
y = "foreclosures per capita",
caption = "source: Maryland Department of the Environment, U.S. Census") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(check_overlap = TRUE, size = 2, aes(label = zip), hjust = 0.5, vjust = -0.5)
per_capita |>
ggplot() +
geom_point(aes(x=income,y=percapita)) +
geom_smooth(aes(x=income,y=percapita), method="lm")
cor.test(per_capita$income, per_capita$percapita)
cor.test(per_capita$percapita, per_capita$income)
cor.test(per_capita$income, per_capita$percapita)
# Now for the adding
final_foreclosures_2022 <- transposed_foreclosures_2022[, c(ncol(transposed_foreclosures_2022), 1:(ncol(transposed_foreclosures_2022)-1))] |>
select(-Date) |>
arrange(month_year)
final_foreclosures_2022
col_sum_2022_months <- final_foreclosures_2022 |>
mutate(sum_of_notices = rowSums(final_foreclosures_2022[,-1]))
foreclosures_by_month_2022 <- col_sum_2022_months[, c(ncol(transposed_foreclosures_2022), 1:(ncol(col_sum_2022_months)-1))] |>
select(month_year, sum_of_notices)
foreclosures_by_month_2022