All Packages needed for the project

library(dplyr)
library(ggplot2)
library(gapminder)
library(openxlsx)
library(plotly)
library(htmlwidgets)

Data Preparation and Transforming

file_path <- "C:/Users/Owner/Downloads/Post Performance(HCHSA).xlsx"
Post_Performance <- read.xlsx(file_path, sheet = 1, startRow = 1, colNames = TRUE)
# Changing the Date Column to display a Date str instead of num
Post_Performance$Date <- as.Date(Post_Performance$Date - 25569, origin = "1970-01-01")
# Convert Date to a Date-Time object if not already
Post_Performance$Date <- as.POSIXct(Post_Performance$Date)
# Creating a Day and Hour variable
Post_Performance$Day <- weekdays(Post_Performance$Date)
Post_Performance$Hour <- format(Post_Performance$Date, "%H")
# First 6 rows of data
head.matrix(Post_Performance)
##                  Date Month_Num Year Month_Text  Time      Day
## 1 2022-12-08 13:09:59        12 2022   December 13:10 Thursday
## 2 2022-12-08 15:04:59        12 2022   December 15:05 Thursday
## 3 2022-12-08 18:36:59        12 2022   December 18:37 Thursday
## 4 2022-12-08 18:43:59        12 2022   December 18:44 Thursday
## 5 2022-12-09 07:37:00        12 2022   December 07:37   Friday
## 6 2022-12-09 12:17:59        12 2022   December 12:18   Friday
##                        Post.ID   Network Post.Type Content.Type
## 1 171169320474_514481674044094  Facebook      Post        Photo
## 2          1600959658579075086   Twitter     Tweet         Link
## 3 171169320474_514626447362950  Facebook      Post        Photo
## 4            17925232292624964 Instagram      Post     Carousel
## 5          1601209510361976834   Twitter     Tweet        Photo
## 6            17851397822877793 Instagram      Post     Carousel
##                              Profile
## 1 HarrisCountyHoustonSportsAuthority
## 2                             @HCHSA
## 3 HarrisCountyHoustonSportsAuthority
## 4                             hc_hsa
## 5                             @HCHSA
## 6                             hc_hsa
##                                                             Link
## 1 https://www.facebook.com/262096672615930/posts/514481674044094
## 2           https://twitter.com/HCHSA/status/1600959658579075086
## 3 https://www.facebook.com/262096672615930/posts/514626447362950
## 4                       https://www.instagram.com/p/Cl7XKUepDe3/
## 5           https://twitter.com/HCHSA/status/1601209510361976834
## 6                       https://www.instagram.com/p/Cl9Pv0VJ3st/
##                                                                                                               Linked.Content
## 1                                                             https://www.facebook.com/262096672615930/posts/514481674044094
## 2                                                                                                 https://hchsa.info/3Bjy56C
## 3                                                             https://www.facebook.com/262096672615930/posts/514626447362950
## 4                                                                                                            No Link In Post
## 5 https://twitter.com/HCHSA/status/1601209510361976834/photo/1, https://twitter.com/HCHSA/status/1601209510361976834/photo/1
## 6                                                                                                            No Link In Post
##   Impressions Organic.Impressions Paid.Impressions Reach Organic.Reach
## 1        1223                1223                0  1214          1214
## 2         189                 189                0    NA            NA
## 3         852                 852                0   852           852
## 4         962                 962                0   806           806
## 5        1024                1024                0    NA            NA
## 6         333                 333                0   262           262
##   Potential.Reach Engagement.Rate.(per.Impression) Engagements Reactions Likes
## 1              NA                           0.1055         129        21    16
## 2            4580                           0.0741          14         0     0
## 3              NA                           0.0634          54        10     7
## 4              NA                           0.0644          62        62    62
## 5           19759                           0.0303          31        12    12
## 6              NA                           0.0601          20        19    19
##   Comments Shares Saves Post.Link.Clicks Other.Post.Clicks Post.Clicks.(All)
## 1        2      1    NA                3               102               105
## 2        0      0    NA                8                 6                14
## 3        0      0    NA               NA                44                44
## 4        0     NA     0               NA                NA                NA
## 5        0      2    NA                1                16                17
## 6        1     NA     0               NA                NA                NA
##   Other.Engagements Video.Views Story.Taps.Forward Story.Exits Story.Replies
## 1                NA          NA                 NA          NA            NA
## 2                 0          NA                 NA          NA            NA
## 3                NA          NA                 NA          NA            NA
## 4                NA           0                 NA          NA            NA
## 5                 0          NA                 NA          NA            NA
## 6                NA           0                 NA          NA            NA
##   Hour
## 1   13
## 2   15
## 3   18
## 4   18
## 5   07
## 6   12

Data Visualization

Post_Performance$Date <- as.Date(Post_Performance$Date, format = "%Y-%m-%d")
#Engagements Overtime
ggplot(Post_Performance, aes(x = Date, y = Engagements)) +
  geom_line() +
  scale_x_date(date_breaks = "2 month", date_labels = "%b %Y") +
  labs(title = "Total Engagement Over Time", x = "Date", y = "Total Engagement")

# Likes by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Likes, fill = Content.Type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "Likes by Network and Content Type")

# Comments by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Comments, fill = Content.Type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "Comments by Network and Content Type")

# Shares by Network and Content Type
ggplot(Post_Performance, aes(x = Network, y = Shares, fill = Content.Type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "Shares by Network and Content Type")

# Engagement by Day of the Week
ggplot(Post_Performance, aes(x = Day, y = Engagements)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Engagement by Day of the Week")

# Engagement by Hour of the Day
ggplot(Post_Performance, aes(x = Hour, y = Engagements)) +
  geom_bar(stat = "identity",
           fill = "steelblue") +
  labs(title = "Total Engagement by Hour of the Day")

# Aggregating total Impressions by day of the Week
day_impressions <- Post_Performance %>% 
  group_by(Day) %>% 
  summarize(Impressions = sum(Impressions, na.rm = TRUE)) %>%
  arrange(desc(Impressions))
day_impressions
## # A tibble: 7 × 2
##   Day       Impressions
##   <chr>           <dbl>
## 1 Thursday      1599651
## 2 Tuesday       1467257
## 3 Monday         971501
## 4 Wednesday      683118
## 5 Friday         363959
## 6 Sunday         185235
## 7 Saturday       176124
day_hour_data <- Post_Performance %>% group_by(Day, Hour) %>% dplyr::summarize(Engagements =n())
## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
ggplot(day_hour_data, aes(Day, Hour, fill = Engagements)) +
  geom_tile(color = "white") +
  ggtitle("Enagements Heat Map By Hour and Day")

# Graph analyzing the engagement rate per impression for each Network. Engagement Rate=Engagements/Impressions (%)
Post_Performance$EngagementRate <- Post_Performance$Engagements / Post_Performance$Impressions
ggplot(Post_Performance, aes(x = Network, y = EngagementRate, fill = Network)) +
  geom_boxplot() +
  labs(title = "Engagement Rate by Network", x = "Network", y = "Engagement Rate %")

#Popularity by Content Types
ggplot(Post_Performance, aes(x = Content.Type, fill = Content.Type)) +
  geom_bar() +
  labs(title = "Popularity of Content Types", x = "Content Type", y = "Count")

Visualizing the Top 5 Performing Posts

# Top 5 posts based on Total Engagement
top_posts <- Post_Performance[order(-Post_Performance$Engagements),][1:5,]
# Top 5 performing posts from Dec 08, 2022 - Dec 09, 2023
# Rank column from my top_posts Vector to better distinguish label names
top_posts$Rank <- paste("Rank", seq_along(top_posts$Post.ID))
# Converting rank to a factor and set levels in the order I want to make things simpler to read
top_posts$Rank <- factor(top_posts$Rank, levels = paste("Rank", 5:1))
# Using Plotly library, I will distinguish the bar colors based on Network to see which network claims the most top performances
p <- plot_ly(top_posts, x = ~Rank, y = ~Engagements, type = 'bar', 
             text = ~Link, hoverinfo = 'text', color = ~Network)
p <- p %>% layout(xaxis = list(title = 'Rank'),
                  yaxis = list(title = 'Engagements'),
                  title = 'Top 5 Performing Posts Dec 22-Dec 23')

p <- p %>% onRender("
  function(el, x) {
    el.on('plotly_click', function(data) {
      var point = data.points[0];
      if(point) {
        window.open(point.text);
      }
    });
  }
")
# Results for Top 5 posts across all Networks (Facebook, Twitter, IG, and LinkedIn)
p