library(dplyr)
library(ggplot2)
library(gapminder)
library(openxlsx)
library(plotly)
library(htmlwidgets)
file_path <- "C:/Users/Owner/Downloads/Post Performance(HCHSA).xlsx"
Post_Performance <- read.xlsx(file_path, sheet = 1, startRow = 1, colNames = TRUE)
# Changing the Date Column to display a Date str instead of num
Post_Performance$Date <- as.Date(Post_Performance$Date - 25569, origin = "1970-01-01")
# Convert Date to a Date-Time object if not already
Post_Performance$Date <- as.POSIXct(Post_Performance$Date)
# Creating a Day and Hour variable
Post_Performance$Day <- weekdays(Post_Performance$Date)
Post_Performance$Hour <- format(Post_Performance$Date, "%H")
# First 6 rows of data
head.matrix(Post_Performance)
## Date Month_Num Year Month_Text Time Day
## 1 2022-12-08 13:09:59 12 2022 December 13:10 Thursday
## 2 2022-12-08 15:04:59 12 2022 December 15:05 Thursday
## 3 2022-12-08 18:36:59 12 2022 December 18:37 Thursday
## 4 2022-12-08 18:43:59 12 2022 December 18:44 Thursday
## 5 2022-12-09 07:37:00 12 2022 December 07:37 Friday
## 6 2022-12-09 12:17:59 12 2022 December 12:18 Friday
## Post.ID Network Post.Type Content.Type
## 1 171169320474_514481674044094 Facebook Post Photo
## 2 1600959658579075086 Twitter Tweet Link
## 3 171169320474_514626447362950 Facebook Post Photo
## 4 17925232292624964 Instagram Post Carousel
## 5 1601209510361976834 Twitter Tweet Photo
## 6 17851397822877793 Instagram Post Carousel
## Profile
## 1 HarrisCountyHoustonSportsAuthority
## 2 @HCHSA
## 3 HarrisCountyHoustonSportsAuthority
## 4 hc_hsa
## 5 @HCHSA
## 6 hc_hsa
## Link
## 1 https://www.facebook.com/262096672615930/posts/514481674044094
## 2 https://twitter.com/HCHSA/status/1600959658579075086
## 3 https://www.facebook.com/262096672615930/posts/514626447362950
## 4 https://www.instagram.com/p/Cl7XKUepDe3/
## 5 https://twitter.com/HCHSA/status/1601209510361976834
## 6 https://www.instagram.com/p/Cl9Pv0VJ3st/
## Linked.Content
## 1 https://www.facebook.com/262096672615930/posts/514481674044094
## 2 https://hchsa.info/3Bjy56C
## 3 https://www.facebook.com/262096672615930/posts/514626447362950
## 4 No Link In Post
## 5 https://twitter.com/HCHSA/status/1601209510361976834/photo/1, https://twitter.com/HCHSA/status/1601209510361976834/photo/1
## 6 No Link In Post
## Impressions Organic.Impressions Paid.Impressions Reach Organic.Reach
## 1 1223 1223 0 1214 1214
## 2 189 189 0 NA NA
## 3 852 852 0 852 852
## 4 962 962 0 806 806
## 5 1024 1024 0 NA NA
## 6 333 333 0 262 262
## Potential.Reach Engagement.Rate.(per.Impression) Engagements Reactions Likes
## 1 NA 0.1055 129 21 16
## 2 4580 0.0741 14 0 0
## 3 NA 0.0634 54 10 7
## 4 NA 0.0644 62 62 62
## 5 19759 0.0303 31 12 12
## 6 NA 0.0601 20 19 19
## Comments Shares Saves Post.Link.Clicks Other.Post.Clicks Post.Clicks.(All)
## 1 2 1 NA 3 102 105
## 2 0 0 NA 8 6 14
## 3 0 0 NA NA 44 44
## 4 0 NA 0 NA NA NA
## 5 0 2 NA 1 16 17
## 6 1 NA 0 NA NA NA
## Other.Engagements Video.Views Story.Taps.Forward Story.Exits Story.Replies
## 1 NA NA NA NA NA
## 2 0 NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA 0 NA NA NA
## 5 0 NA NA NA NA
## 6 NA 0 NA NA NA
## Hour
## 1 13
## 2 15
## 3 18
## 4 18
## 5 07
## 6 12
Post_Performance$Date <- as.Date(Post_Performance$Date, format = "%Y-%m-%d")
#Engagements Overtime
ggplot(Post_Performance, aes(x = Date, y = Engagements)) +
geom_line() +
scale_x_date(date_breaks = "2 month", date_labels = "%b %Y") +
labs(title = "Total Engagement Over Time", x = "Date", y = "Total Engagement")
# Likes by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Likes, fill = Content.Type)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Likes by Network and Content Type")
# Comments by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Comments, fill = Content.Type)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Comments by Network and Content Type")
# Shares by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Shares, fill = Content.Type)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Shares by Network and Content Type")
# Engagement by Day of the Week
ggplot(Post_Performance, aes(x = Day, y = Engagements)) +
geom_bar(stat = "identity") +
labs(title = "Total Engagement by Day of the Week")
# Engagement by Hour of the Day
ggplot(Post_Performance, aes(x = Hour, y = Engagements)) +
geom_bar(stat = "identity",
fill = "steelblue") +
labs(title = "Total Engagement by Hour of the Day")
# Aggregating total Impressions by day of the Week
day_impressions <- Post_Performance %>%
group_by(Day) %>%
summarize(Impressions = sum(Impressions, na.rm = TRUE)) %>%
arrange(desc(Impressions))
day_impressions
## # A tibble: 7 × 2
## Day Impressions
## <chr> <dbl>
## 1 Thursday 1599651
## 2 Tuesday 1467257
## 3 Monday 971501
## 4 Wednesday 683118
## 5 Friday 363959
## 6 Sunday 185235
## 7 Saturday 176124
day_hour_data <- Post_Performance %>% group_by(Day, Hour) %>% dplyr::summarize(Engagements =n())
## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
ggplot(day_hour_data, aes(Day, Hour, fill = Engagements)) +
geom_tile(color = "white") +
ggtitle("Enagements Heat Map By Hour and Day")
# Graph analyzing the engagement rate per impression for each Network. Engagement Rate=Engagements/Impressions (%)
Post_Performance$EngagementRate <- Post_Performance$Engagements / Post_Performance$Impressions
ggplot(Post_Performance, aes(x = Network, y = EngagementRate, fill = Network)) +
geom_boxplot() +
labs(title = "Engagement Rate by Network", x = "Network", y = "Engagement Rate %")
#Popularity by Content Types
ggplot(Post_Performance, aes(x = Content.Type, fill = Content.Type)) +
geom_bar() +
labs(title = "Popularity of Content Types", x = "Content Type", y = "Count")
# Top 5 posts based on Total Engagement
top_posts <- Post_Performance[order(-Post_Performance$Engagements),][1:5,]
# Top 5 performing posts from Dec 08, 2022 - Dec 09, 2023
# Rank column from my top_posts Vector to better distinguish label names
top_posts$Rank <- paste("Rank", seq_along(top_posts$Post.ID))
# Converting rank to a factor and set levels in the order I want to make things simpler to read
top_posts$Rank <- factor(top_posts$Rank, levels = paste("Rank", 5:1))
# Using Plotly library, I will distinguish the bar colors based on Network to see which network claims the most top performances
p <- plot_ly(top_posts, x = ~Rank, y = ~Engagements, type = 'bar',
text = ~Link, hoverinfo = 'text', color = ~Network)
p <- p %>% layout(xaxis = list(title = 'Rank'),
yaxis = list(title = 'Engagements'),
title = 'Top 5 Performing Posts Dec 22-Dec 23')
p <- p %>% onRender("
function(el, x) {
el.on('plotly_click', function(data) {
var point = data.points[0];
if(point) {
window.open(point.text);
}
});
}
")
# Results for Top 5 posts across all Networks (Facebook, Twitter, IG, and LinkedIn)
p