clean_text_for_cloud <- function(string) {
# We're going to create a vector of character names
# If they aren't deleted, they clog up the word cloud
blc_characters <- c(
'araun',
'aria',
'astrid',
'bahamut',
'ben',
'burhalla',
'brisa',
'cabot',
'cal',
'celeste',
'cino',
'coco',
'cuicatl',
'curio',
'cynthian',
'dave',
'dill',
'drake',
'echo',
'esper',
'icetales',
'inno',
'ivy',
'kate',
'koa',
'kora',
'lexx',
'luz',
'melanie',
'mergo',
'mellow',
'mia',
'myles',
'nate',
'nero',
'nip',
'owen',
'pana',
'petram',
'pleo',
'polaris',
'puddle',
'raven',
'rocky',
'saltriv',
'scraggy',
'scrafty',
'seyka',
'shiron',
'skara',
'starr',
'soul',
'tempest',
'tricky',
'vix',
'zane'
)
# hrrk, can't brain
# can't think of how to map this
# use a terrible loop instead...
clean_string <- string %>% str_replace_all('[:digit:]', '')
for (i in 1:length(stopwords('english'))) {
clean_string <- str_replace_all(clean_string, paste0('\\b', stopwords('english')[i], '\\b'), '')
}
# Do it again for character names
for (i in 1:length(blc_characters)) {
clean_string <- str_replace_all(clean_string, paste0('\\b', blc_characters[i], '\\b'), '')
}
# And finally, get rid of uninteresting, common words that float to the top
# Let the record show that the actual most-used non-stop word in BLC was "like"
nuisance_words <- c(
'like',
'just',
'said'
)
for (i in 1:length(nuisance_words)) {
clean_string <- str_replace_all(clean_string, paste0('\\b', nuisance_words[i], '\\b'), '')
}
# Zap spaces introduced by the above steps
clean_string <- str_replace_all(clean_string, '[:space:]{2,}', ' ')
return(clean_string)
}
create_user_doc <- function(data, user) {
data %>%
filter(username == user) %>%
{paste(.$clean_post, collapse = ' ')} %>%
clean_text_for_cloud() %>%
return()
}
extract_freqs <- function(dtm, row) {
# Takes a document term matrix and row number (indexes a document)
# Returns a data frame of terms and frequencies for that document,
# sorted by descending frequencies, truncated to 200 (so the laptop doesn't explode when creating the wordcloud)
framed_freqs <- data.frame(as.matrix(t(dtm[row,]))) %>%
rownames_to_column()
names(framed_freqs) <- c('term', 'freq')
framed_freqs %>%
arrange(-freq) %>%
# The wordcloud function has a hernia about too-large data frames,
# so trim these down a bit
head(n = 200) %>%
return()
}
format_post <- function(data) {
return(paste0('<a href="https://forums.thousandroads.net/index.php?threads/', data['thread_id'], '/post-', data['post_id'], '">#', data['post_id'], '</a>'))
}
format_thread <- function(data) {
return(paste0('<a href="https://forums.thousandroads.net/index.php?threads/', data['thread_id'], '">', data['thread_title'], '</a>'))
}
format_username <- function(data) {
return(paste0('<a href="https://forums.thousandroads.net/index.php?members/', data['user_id'], '">', data['username'], '</a>'))
}
get_highest_poster <- function(data) {
# R doesn't provide a convenient "mode" function
# So we need to write our own to work with summarize()
return(names(sort(table(data), decreasing = TRUE)[1]))
}
user_bar_plot <- function(data, stratifier, xvar = 'username') {
# As ever, using ggplot in a function context is awful
symd <- sym(stratifier)
u_symd <- sym(xvar)
ggplot(data, aes(x = reorder(!!u_symd, !!symd), y = !!symd)) +
geom_col(fill = '#240440') +
# Note how the use of symd does not require (and in fact, actively rejects) quasiquotation
# I have no fucking idea why!
geom_text(aes_string(label = symd), hjust = -0.15) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
coord_flip() +
theme_minimal() +
xlab('')
}
# Load full post dataset
posts <- read_tsv('blacklight_posts.txt', col_names = c('post_id', 'user_id', 'thread_title', 'thread_id', 'username', 'post'))
# Clean up our text a little
cleaned_posts <- posts %>%
# Zap square brackets *specifically* at the start/end of lines
# in an attempt to rescue group comm posts that otherwise get reduced to an empty string
mutate(clean_post = if_else(
str_replace_all(post, '\\[[^\\]]*\\]', '') == "",
# Delete leading/trailing square brackets
substr(post, 2, nchar(post) - 1),
post
)) %>%
# Zap BBCode
mutate(clean_post = str_replace_all(clean_post, '\\[[^\\]]*\\]', '')) %>%
# Force lowercase
mutate(clean_post = tolower(clean_post)) %>%
# Lazy regularization: remove 's
mutate(clean_post = str_replace_all(clean_post, "'s\\b", '')) %>%
# Fix a couple cases: em dashes would get deleted in next step, resulting in words getting smooshed
# We want to nix the punctuation but keep the words separate
mutate(clean_post = str_replace_all(clean_post, "--", ' ')) %>%
mutate(clean_post = str_replace_all(clean_post, "—", ' ')) %>%
# Ellipses can also cause this problem
mutate(clean_post = str_replace_all(clean_post, "\\.{2,}", ' ')) %>%
# Annihilate scene breaks
mutate(clean_post = str_replace_all(clean_post, "[\\<\\>]", ' ')) %>%
# Delete punctuation
mutate(clean_post = str_replace_all(clean_post, '[:punct:]', '')) %>%
# Transform runs of spaces into single spaces
mutate(clean_post = str_replace_all(clean_post, '[:space:]{2,}', ' ')) %>%
mutate(clean_post = str_trim(clean_post)) %>%
# Count words based on space boundaries
rowwise() %>%
mutate(
wordcount = length(str_split(clean_post, '\\s+')[[1]]),
hopecount = str_count(clean_post, '\\bhope[s]?\\b'),
hope_idx = hopecount/wordcount
) %>%
ungroup()
# Further processing for word-cloud-ing
# For whatever reason tm_map's implementation of the stem word removal function takes so long
# it isn't practical even to look at the full corpus for some of the top posters!
# It must be doing something fancy, because my stupid regex version here handles it in a relative eyeblink
#
#
# # Make the whole of the RP into one big, long string
# fulltext <- paste(cleaned_posts$clean_post, collapse = ' ') %>%
# # Eliminate numbers (e.g. damage reports)
# str_replace_all('[:digit:]', '')
#
# # hrrk, can't brain
# # can't think of how to map this
# # use a terrible loop instead...
# fulltext_clean <- fulltext
# for (i in 1:length(stopwords('english'))) {
# fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', stopwords('english')[i], '\\b'), '')
# }
#
# # Do it again for character names
# for (i in 1:length(blc_characters)) {
# fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', blc_characters[i], '\\b'), '')
# }
#
# # And finally, get rid of uninteresting, common words that float to the top
# # Let the record show that the actual most-used non-stop word in BLC was "like"
# nuisance_words <- c(
# 'like',
# 'just',
# 'said'
# )
#
# for (i in 1:length(nuisance_words)) {
# fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', nuisance_words[i], '\\b'), '')
# }
#
# # Zap spaces introduced by the above steps
# fulltext_clean <- str_replace_all(fulltext_clean, '[:space:]{2,}', ' ')
#
# BLCorpus <- Corpus(VectorSource(fulltext_clean))
#
# # Do some stemming
# BLCorpus_cleaned <- BLCorpus %>% tm_map(stemDocument)
#
# # Create the term document matrix
# blc_tdm <- TermDocumentMatrix(BLCorpus_cleaned) %>%
# as.matrix()
#
# # Get word frequencies and construct wordcloud
# word_freqs <- sort(rowSums(blc_tdm),decreasing=TRUE)
#
# word_freqs_frame <- data.frame(word = names(word_freqs),freq=word_freqs)
#
# wordcloud(words = word_freqs_frame$word, freq = word_freqs_frame$freq, min.freq = 1,
# max.words=200, random.order=FALSE, rot.per=0.35,
# colors=rev(brewer.pal(8, "PuOr")))
# Convenient to have these broken out for the in-text references
post_counts <- posts %>% add_count(username) %>% arrange(-n) %>% distinct(user_id, username, n)
word_counts <- cleaned_posts %>%
group_by(username) %>%
summarize(user_id = max(user_id), words = sum(wordcount)) %>%
arrange(-words)
thread_summary <- cleaned_posts %>%
group_by(thread_id) %>%
summarize(
thread_title = max(thread_title),
count = n(),
words = sum(wordcount),
wpp = words/count,
username = get_highest_poster(username),
user_id = get_highest_poster(user_id)
) %>%
arrange(-count)
Blacklight was an extended roleplaying campaign that was first announced July 17, 2020 and which finally concluded 500 days later on November 29, 2021. With this document I'm going to take a look at some of the stats surrounding the RP--how many posts were made, who made the most of them, and so on and so forth.
For this analysis I'm looking at all non-sticky threads in the Blacklight Campaign forum and all its sub-forums. I wanted to focus on actual posts with role-playing, so cutting out e.g. the ~3k posts in the "Official Actions" thread that are mostly just people claiming points and redoing stats was desired. I've also removed the Art & Doodles and Prize Volunteers threads, which aren't actually RP content. They both also contain posts from people who didn't actually participate in the RP, which gums everything up! Deleted posts were also excluded.
With all that said, let's take a look at the results!
When all was said and done, the RP ran for 25837 posts and 3014474 words! A total of 44 users contributed to the RP. Unsurprisingly, the most prolific poster was Namohysip, with 5780 posts, or 22.4% of the total. In second place was unrepentantAuthor with 2811, 10.9% of the total. The full breakdown of posts by user is below.
user_bar_plot(post_counts, 'n') +
ggtitle('Blacklight Posts Per User') +
ylab('Total Posts')
The word count per user looks a little bit different! Unsurprisingly, the first couple slots are the same, as the sheer number of posts by the most prolific players ensure that even if they had shorter posts than the norm (rarely!) they were nevertheless putting in the most words overall. However, there are more shake-ups further down the list. Cresselia92 had the ninth-highest number of posts but jumped to 3rd place in terms of word count--slightly fewer posts, but with a lot of effort put into them! Spiteful Murkrow didn't crack the top ten word counts, but had the biggest discrepancy between post count and word count out of anyone, climbing nine places in the rankings. If he'd joined the RP at its outset, Kate might have ended up with one of the longest stories of any character!
user_bar_plot(word_counts, 'words') +
ggtitle('Total Words Per User') +
ylab('Total Words')
In addition to total words written, looking at the median words written per player gives a sense of their preferred RP style. Whether it's short and snappy back and forth or long posts full of description and introspection, there was a great deal of variety in what sort of posts different players gravitated towards. This is one of few metrics where Namohysip doesn't land on top by a mile! I'm tickled by how many ties we see here, too.
cleaned_posts %>%
group_by(username) %>%
summarize(med_words = median(wordcount)) %>%
user_bar_plot('med_words') +
ggtitle('Median Words Per Post, by Player') +
ylab('Median Words Per Post')
We can also take a look at the overall distribution of post length to get a sense of the RP's general vibe.
The mean word count per post was 116.7, and the median was 86, indicating that the typical RP post was roughly a paragraph in length. If people deviated from that, they were more likely to go shorter than longer. However, there were a few cases where people broke out truly epic-length replies, and these dragged the mean word count a little towards the right. Overall, Blacklight wasn't a "casual" RP, but neither was it one where players are expected to put in multiple paragraphs per reply, as is often the case for forum-based games.
ggplot(cleaned_posts, aes(x = wordcount)) +
geom_histogram(bins = 100, fill = '#240440') +
scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, by = 100), expand = expansion(mult = c(0, 0.1))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
theme_minimal() +
labs(
title = 'Overall Distribution of Words Per Post',
x = 'Words Per Post',
y = 'Count',
caption = 'Sixty-four posts of more than 1000 words have been excluded in the interest of readability.'
)
So what were those ultralong posts? Let's take a look. Here are the top ten longest posts in the RP:
# Thiiiiis is messy. Paying the price for being lazy in how I set up my formatting functions
longest_posts <- cleaned_posts %>%
arrange(-wordcount) %>%
head(n = 10)
data.frame(
Post = apply(longest_posts, 1, format_post),
Thread = apply(longest_posts, 1, format_thread),
Player = apply(longest_posts, 1, format_username),
Words = longest_posts$wordcount
) %>%
kable('html', escape = FALSE) %>%
kable_styling()
Post | Thread | Player | Words |
---|---|---|---|
#49459 | Rocky Road ~ House of Starr and Nate | Negrek | 3919 |
#28294 | Poppin' Arms ~ Dorm Lounge | Cresselia92 | 3370 |
#39214 | [Support] Other Storms | DragonD | 2728 |
#39288 | [Support] Other Storms | Negrek | 2704 |
#49558 | Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix | Fusion | 2631 |
#47646 | Null Realm ~ Blacklight Eternity | Namohysip | 2610 |
#36850 | Nil Plateaus | Fusion | 2547 |
#37368 | Destiny Colosseum | Namohysip | 2469 |
#41597 | The Moon | Namohysip | 2445 |
#38753 | [Support] Other Storms | RJR Basimilus | 2108 |
The all-time longest post in the RP contains nearly 4,000 words and is over 500 words longer than the next-longest on the list. Perhaps someone got confused and thought they were writing a fanfic chapter instead of an RP post; not sure what's up with that. Overall Namohysip contributed the most of the top ten longest posts, with three, while Negrek and Fusion had two each. The "Other Storms" thread, intended for Support players to write scenes with their familiars while Front-line players fought them in individual thread, had the most of these longest posts. Front-line players probably ended up writing as many or more words over the course of their respective scenes during the boss fight, but as most Support players condensed their entire character progression into a single post without DM intervention, they ended up with much higher single-post word counts.
On the other end of the spectrum, let's take a look at the RP's shortest posts. Excluding posts that were solely an emoji, image, or scene break, here are all the single-word posts made over the course of the RP:
single_word_posts <- cleaned_posts %>%
# We have to do some fancy filtering to remove images and scene breaks
# I've also removed Group Chat posts that were a single word outside of the address
# (e.g. [Soda > Team Spectrum])
# How many words they count as is arguable, and this way is easier
filter(wordcount == 1 & !grepl('>', post) & !grepl('ATTACH', post) & !grepl('img', post) & !grepl(':', post)) %>%
# For fun, reformat some of the BBCode
mutate(
post = str_replace(post, '\\[I\\]', '<i>'),
post = str_replace(post, '\\[/I\\]', '</i>'),
post = str_replace(post, '\\[B\\]', '<b>'),
post = str_replace(post, '\\[/B\\]', '</b>')
) %>%
arrange(post_id)
data.frame(
Post = apply(single_word_posts, 1, format_post),
Thread = apply(single_word_posts, 1, format_thread),
Player = apply(single_word_posts, 1, format_username),
Content = single_word_posts$post
) %>%
kable('html', escape = FALSE) %>%
kable_styling()
Post | Thread | Player | Content |
---|---|---|---|
# 5820 | Treasure Town ~ Marowak Dojo | Namohysip | Later. |
#12314 | Group Communication | TheGOAT | Wuh? |
#14593 | Poppin' Arms Guild ~ Main Lobby | Ambyssin | “Delightful.” |
#15695 | Poppin' Arms ~ Large Dorms | Shiny Phantump | “yes...” |
#31689 | Mystery Continent ~ Northern Pines | Namohysip | ...What. |
#34464 | Poppin' Arms Guild ~ Main Lobby | IFBench | Oh." |
#34677 | Mystery Continent ~ Northern Pines | Ambyssin | “Elaborate.” |
#34900 | Group Communication | Ambyssin | ... idiot. |
#35087 | Group Communication | Fusion | ...Ew. |
#36511 | Spirits' Edge | Namohysip | Creation. |
#41899 | Lush Prairie | unrepentantAuthor | Joule." |
#43749 | Destiny Colosseum | unrepentantAuthor | "Shit." |
#47028 | Destiny Village ~ Parfait Way | Magyk | "WHAT?!" |
#47142 | Destiny Village ~ Parfait Way | Namohysip | ...Brisa." |
King of the single-word posts was Namohysip with four, followed by Ambyssin with three. The Group Communication thread had the most single-word posts, although it only contributed three of the fourteen. This thread contained a lot of short, quippy posts in general, so it's no surprise to see it showing up here. But funnily enough, none of Namohysip's single-word posts were in that thread!
Let's take a look at threads next. The RP consisted of 99 different threads, including the special Eterna Storm threads that housed one player scene each. Here are the most popular threads in the RP, based on total number of posts:
data.frame(
Thread = apply(head(thread_summary, n = 10), 1, format_thread),
Posts = head(thread_summary, n = 10)$count,
Words = head(thread_summary, n = 10)$words,
`Player with Most Posts` = apply(head(thread_summary, n = 10), 1, format_username)
) %>%
kable('html',
col.names = c('Thread', 'Posts', 'Words', 'Player with Most Posts'),
escape = FALSE
) %>%
kable_styling()
Thread | Posts | Words | Player with Most Posts |
---|---|---|---|
Grass Continent ~ Treasure Town Beach | 1213 | 115976 | Adamhuarts |
Destiny Village ~ Parfait Way | 1162 | 107786 | Namohysip |
Destiny Village ~ Central Crossroads | 1159 | 116458 | Namohysip |
Destiny Colosseum | 1141 | 181279 | Namohysip |
Destiny Village ~ X-Eye Cauldron | 874 | 91652 | Dragonfree |
Mystery Continent ~ Northern Pines | 846 | 95335 | Namohysip |
Poppin' Arms Guild ~ Main Lobby | 797 | 97993 | Namohysip |
Destiny Colosseum ~ Training Rooms | 785 | 89189 | unrepentantAuthor |
Destiny Village ~ Sundae Park | 759 | 75576 | Namohysip |
Destiny Tower | 744 | 80672 | Namohysip |
Unsurprisingly, Namohysip contributed the greatest number of posts to the majority of these (and all) threads. However, we can see some fun character traits shining through in the most common posters in the other threads. Grass Continent ~ Treasure Town Beach is unusual to begin with, as not only the longest thread but also the only among the top ten that isn't in Destiny Village, and it turns out that Cynthian's its biggest visitor! Similarly, Brisa was the character to make the most use of the Destiny Colosseum ~ Training Rooms thread, while Dave was Destiny Village ~ X-Eye Cauldron's biggest customer!
The shortest threads aren't terribly interesting; unsurprisingly, these are mostly made up of character-specific Eterna Storm threads, which by their nature tend to be short. The shortest thread that represents an actual location as such is Void Core, with 17 posts, although even that thread was somewhat restricted in how many/how often players could access it.
We can also look at the threads with the meatiest posts.
thread_summary %>%
arrange(-wpp) %>%
{data.frame(
Thread = apply(head(., n = 10), 1, format_thread),
wpp = round(head(., n = 10)$wpp, digits = 1),
`Player with Most Posts` = apply(head(., n = 10), 1, format_username)
)} %>%
kable('html',
col.names = c('Thread', 'Words Per Post', 'Player with Most Posts'),
escape = FALSE
) %>%
kable_styling()
Thread | Words Per Post | Player with Most Posts |
---|---|---|
[Support] Other Storms | 1577.8 | Negrek |
[Owen] Hot Spot Cave | 540.6 | Namohysip |
[Icetales] Core of Chaos | 302.8 | Cresselia92 |
[Vix] Grand Arena | 295.6 | Fusion |
The Moon | 287.6 | Namohysip |
Radiant Void | 278.2 | Namohysip |
Nil Plateaus | 250.0 | Fusion |
Worldcore | 246.3 | Namohysip |
Null Realm ~ Blacklight Eternity | 233.0 | IFBench |
[Koa] Strange Building | 221.2 | Flyg0n |
A full half of these threads are associated with the Eterna Storm boss battle, and particularly interesting to me are those belonging to Namohysip and Flyg0n, who didn't have particularly high word counts per post overall but who ended up in the top ten with their respective Eterna Storm threads. Clearly people pulled out all the stops for that event!
In fact, with the sole exception of Nil Plateaus, these are all boss battle threads; it probably isn't surprising that people put out some of their longest posts for those, Namohysip almost certainly among them! It's also appropriate to see the very last battle of the RP, Null Realm ~ Blacklight Eternity, appearing among the top ten. Things certainly went out with a bang!
Once again, considering the threads with the lowest number of words per post isn't tremendously interesting. Group Communication lands square on the bottom as expected, at a whopping 37.5 words per post.
We've looked at a number of broad-strokes metrics relating to how many words are present in posts or threads. Let's now zoom in a bit and consider what words are showing up. In particular, because hope is a major theme of the RP, we can see how often it comes up.
In the end, "hope" or "hopes" was used 1228 times over the course of the RP, or roughly once per 2454.8 posts. We can also take a look at the RP's various players and construct for them a "hope index," a measure of how many times they use "hope" or "hopes" per 1000 words.
cleaned_posts %>%
group_by(username) %>%
summarize(hope_idx = round(sum(hopecount)/sum(wordcount) * 1000, digits = 2)) %>%
user_bar_plot('hope_idx') +
ggtitle('Hope Index by Player') +
ylab('"Hope" Per 1000 Words')
I find the results here pretty surprising! To some extent, this measure favors players who tend to make shorter posts, although Cresselia92 has a pretty respectable hope index, despite putting in a lot of words in general. And it perhaps isn't surprising that Namohysip doesn't appear higher on the list, as he was playing all the NPCs, some of whom aren't especially hopeful. I'd be curious to see how his ranking might change if this analysis were limited to Owen posts! Finally, Ambyssin's top-half placement is interesting to me. Bahamut is a rather notoriously unhopeful character, but one who perhaps talked about it more often than many others--fixated on it in a negative way, perhaps. Unfortunately this analysis doesn't look at any of the context around the words involved, so a character dismissing hope reads as "hopeful" as one embracing it.
We can also look at the most hopeful posts as well as the most hopeful players:
most_hopeful_posts <- cleaned_posts %>%
arrange(-hope_idx) %>%
head(n = 10) %>%
mutate(
post = str_replace(post, '\\[I\\]', '<i>'),
post = str_replace(post, '\\[/I\\]', '</i>'),
post = str_replace(post, '\\>', '<')
)
data.frame(
Post = apply(most_hopeful_posts, 1, format_post),
Thread = apply(most_hopeful_posts, 1, format_thread),
Player = apply(most_hopeful_posts, 1, format_username),
Content = most_hopeful_posts$post
) %>%
kable('html', escape = FALSE) %>%
kable_styling()
Post | Thread | Player | Content |
---|---|---|---|
#47185 | Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix | IFBench | Hope that I'll be able to help. Hope that I can save others. Hope that I can make things better," Saltriv answered, smiling brightly. |
#15834 | Poppin' Arms Guild ~ Infirmary | Persephone | Oh! Yeah, I'll take as many as you want to give. Thank you!" Still hope. Still. Hope. |
#21487 | Group Communication | Virgil134 | |
#47915 | Destiny Village ~ Central Crossroads | Ambyssin | Bahamut smirked. "Fortunately for you, hope's a big thing here." |
#45780 | Rocky Road ~ House of Diyem, Eien, Mellow, and Toast | IFBench | Saltriv's eyes widened, and a glimmer of hope sparkled within them. There was hope still yet. "Thank you!" they exclaimed. |
#45366 | Poppin' Arms Guild ~ Main Lobby | unrepentantAuthor | Brisa winced, and looked to Diyem in hopes of being wrong. |
#45777 | Rocky Road ~ House of Diyem, Eien, Mellow, and Toast | Sudmensch | Dill looked back up, blinking as his hopes pricked back up. |
#32755 | Rocky Road ~ House of Brisa, Dave, Koa, and Owen | Dragonfree | Dave raised an eyebrow. "Therapy? I it's not with fucking Bean." |
#42307 | Destiny Village ~ Parfait Way | Ambyssin | “You don’t actually turn Blacklight type.” Nero shook his head. “At least, I hope not.” |
#33299 | Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix | Navar | I'm Shiron!" he waved and smiled. "Nice to meet you. Hope we can be friends!" |
Taking a look at the results reveals some of the weaknesses of this analysis. It favors short posts that happen to have hope mentioned as little as once, rather than posts that evoke it multiple times (more on that later), and it's oblivious to context--not all of the uses here are addressing "hope" as a concept, and not all of them are necessarily positive about hope, which is maybe contrary to the themes of the RP.
Nevertheless, I think IFBench's "most hopeful post" here does capture the spirit of the RP perfectly. What's more Blacklight than "Hope that I'll be able to help. Hope that I can save others. Hope that I can make things better"?
Let's take one last, brief look at those posts that have the highest total uses of the words "hope" or "hopes," regardless of their length:
most_hopes_posts <- cleaned_posts %>%
arrange(-hopecount, -post_id) %>%
head(n = 10)
data.frame(
Post = apply(most_hopes_posts, 1, format_post),
Thread = apply(most_hopes_posts, 1, format_thread),
Player = apply(most_hopes_posts, 1, format_username),
hopenum = most_hopes_posts$hopecount
) %>%
kable(
'html',
escape = FALSE,
col.names = c('Post', 'Thread', 'Player', 'Number of Hopes')
) %>%
kable_styling()
Here Cresselia92's hopeful posting really shines through! She has the lion's share of the top ten posts by hope count, as well as the most-hopeful post by count overall, which if you click through and read it is definitely embracing Blacklight's themes! All in all there were 10 posts in the RP that used "hope" or "hopes" more than once. Namohysip actually had the greatest number of these, at 3, but Cresselia92 ended up right behind with 4. Although she didn't necessarily top the list on all of the hope-related stats, she consistently placed high and clearly created a character that strongly resonated with the RP's themes.
As a fun quirk of the data, all of the "hopes" in Namohysip's Offscreen Activities post actually came from Negrek's Offscreen Activities post: they were simply contained in quotes. Another limitation of this analysis!
Finally, I'm going to break things out for each player individually, both so people can look up their numbers in a single, convenient place, and to add a couple new analyses, too!
Most notably, for players with at least 100 posts, I've included a word cloud that shows that player's most-used distinctive words. Unfortunately, the word cloud generator kept exploding for people with fewer than 100 posts, so I've had to exclude that analysis for them.
A user's most distinctive words aren't necessarily the ones they use most often, but they are the ones they used most often relative to other players in the RP. In general, everybody's actual most common words are things like, well, "like," "said," etc. One side-effect of this is that proper names tend to show up really strongly--in general, nobody mentioned a player character's name more than their own player! I've therefore filtered out the names of player characters and familiars in order to try and let more interesting words rise to the top. It was kind of fun to see clouds essentially "labelled" with a great big "CHARACTER NAME" plopped in the middle of each cloud, but it made it hard to read the more-interesting words that were pushed to the margins.
In that vein, I don't know what's going on with TheGOAT's cloud, where the usual name-cleaning function seems to have failed. Somehow he used a lot of words that got stemmed down to "astrid" but didn't exactly match "astrid." Fortunately the clean-up worked at least a little, since "astrid" isn't nearly as dominant in that cloud as it would be if most instances of her name hadn't gotten zapped, but in general I have no idea what's going on in that cloud, and I blame TheGOAT. What even is that... meteorite... emoji... thing?
# We basically want a single map + function call to programmatically generate a whole bunch of stats for individual players
# So let's, uh, see if we can make this happen
display_player <- function(data, word_freqs, user) {
filtered_data <- data %>% filter(username == user)
g <- filtered_data %>%
ggplot(aes(x = wordcount)) +
geom_histogram(fill = '#240440') +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
theme_minimal() +
ggtitle(paste('Distribution of Words Per Post for', user)) +
xlab('Words Per Post') +
ylab('Count')
cat(paste0(
'<h3>', user,
'</h3>\n**Number of Posts:** ', nrow(filtered_data),
'\n\n**Total Words:** ', sum(filtered_data$wordcount), ' (median ', median(filtered_data$wordcount), ' words per post)',
'\n\n**Longest Post:** ', format_post(arrange(filtered_data, -wordcount)[1,]), ' (', arrange(filtered_data, -wordcount)[[1, 'wordcount']], ' words)',
'\n\n'
))
print(g)
cat(paste0(
'\n\n**Posted Most In:** ', format_thread(arrange(add_count(filtered_data, thread_id), -n)[1,]), ' (', arrange(add_count(filtered_data, thread_id), -n)[[1, 'n']], ' posts)',
'\n\n**Number of Hopes:** ', sum(filtered_data$hopecount), ' (', round(sum(filtered_data$hopecount) * 1000 / sum(filtered_data$wordcount), digits = 2), ' per 1000 words)',
'\n\n'
))
# Wrap up with a word cloud!
# ...if the user has enough posts
# only then
if (nrow(filtered_data) >= 100) {
cat(paste0('<h4>Most Distinct Words for ', user, '</h4>'))
wordcloud(words = word_freqs[[user]]$term, freq = word_freqs[[user]]$freq,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=rev(brewer.pal(8, "Dark2")))
}
cat('\n\n*****\n\n')
}
# We're going to make word clouds for everybody!
# In order to make TF-IDF go
# (and trust me, TF-IDF produces *far* more amusing results than straight term frequency)
# we need to create a corpus where each document is the sum total of a player's blacklight posts
# When we reach their individual print step, we'll then fetch the info on their doc specifically from the results
distinct_users <- sort(distinct(cleaned_posts, username)$username)
# Create the user lookup that we really should have been using since the very beginning, honestly
user_lookup <- seq(1, length(distinct_users))
names(user_lookup) <- distinct_users
user_lookup <- as.list(user_lookup)
user_docs <- map_chr(distinct_users, ~ create_user_doc(cleaned_posts, .x))
BLCorpus <- Corpus(VectorSource(user_docs))
# Do some stemming
BLCorpus_cleaned <- BLCorpus %>% tm_map(stemDocument)
dtm <- DocumentTermMatrix(BLCorpus_cleaned, control = list(weighting = weightTfIdf))
user_word_freqs <- map(distinct_users, ~ extract_freqs(dtm, user_lookup[[.x]]))
names(user_word_freqs) <- distinct_users
walk(distinct_users, ~ display_player(cleaned_posts, user_word_freqs, .x))
Number of Posts: 1304
Total Words: 123652 (median 78 words per post)
Longest Post: #6338 (795 words)
Posted Most In: Grass Continent ~ Treasure Town Beach (150 posts)
Number of Hopes: 75 (0.61 per 1000 words)
Number of Posts: 2085
Total Words: 145310 (median 49 words per post)
Longest Post: #25503 (1207 words)
Posted Most In: Destiny Village ~ Parfait Way (184 posts)
Number of Hopes: 75 (0.52 per 1000 words)
Number of Posts: 18
Total Words: 382 (median 19.5 words per post)
Longest Post: #26173 (65 words)
Posted Most In: Poppin' Arms Guild ~ Infirmary (9 posts)
Number of Hopes: 0 (0 per 1000 words)
Number of Posts: 1099
Total Words: 128155 (median 97 words per post)
Longest Post: #36996 (946 words)
Posted Most In: Destiny Colosseum (130 posts)
Number of Hopes: 27 (0.21 per 1000 words)
Number of Posts: 1047
Total Words: 243309 (median 181 words per post)
Longest Post: #28294 (3370 words)
Posted Most In: Poppin' Arms ~ Dorm Lounge (93 posts)
Number of Hopes: 163 (0.67 per 1000 words)
Number of Posts: 2
Total Words: 290 (median 145 words per post)
Longest Post: #5136 (235 words)
Posted Most In: [Inaccessible] Veritas City (1 posts)
Number of Hopes: 0 (0 per 1000 words)
Number of Posts: 621
Total Words: 36905 (median 47 words per post)
Longest Post: #9915 (337 words)
Posted Most In: Tree of Life ~ Exterior (84 posts)
Number of Hopes: 14 (0.38 per 1000 words)
Number of Posts: 55
Total Words: 18694 (median 211 words per post)
Longest Post: #39214 (2728 words)
Posted Most In: Shaymin Village (19 posts)
Number of Hopes: 4 (0.21 per 1000 words)
Number of Posts: 1503
Total Words: 146268 (median 76 words per post)
Longest Post: #19454 (1490 words)
Posted Most In: Destiny Village ~ X-Eye Cauldron (171 posts)
Number of Hopes: 41 (0.28 per 1000 words)
Number of Posts: 56
Total Words: 5769 (median 80.5 words per post)
Longest Post: #5995 (330 words)
Posted Most In: Marowak Dojo ~ Living Quarters (19 posts)
Number of Hopes: 1 (0.17 per 1000 words)
Number of Posts: 204
Total Words: 15145 (median 66.5 words per post)
Longest Post: #43141 (285 words)
Posted Most In: Grass Continent ~ Treasure Town Beach (36 posts)
Number of Hopes: 6 (0.4 per 1000 words)
Number of Posts: 689
Total Words: 88248 (median 105 words per post)
Longest Post: #39367 (827 words)
Posted Most In: Rocky Road ~ House of Brisa, Dave, Koa, and Owen (115 posts)
Number of Hopes: 31 (0.35 per 1000 words)
Number of Posts: 1128
Total Words: 148128 (median 108 words per post)
Longest Post: #49558 (2631 words)
Posted Most In: Destiny Village ~ Central Crossroads (70 posts)
Number of Hopes: 41 (0.28 per 1000 words)
Number of Posts: 544
Total Words: 29020 (median 35 words per post)
Longest Post: #41340 (446 words)
Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (104 posts)
Number of Hopes: 25 (0.86 per 1000 words)
Number of Posts: 98
Total Words: 3760 (median 21 words per post)
Longest Post: #5833 (353 words)
Posted Most In: Treasure Town ~ Main Square (26 posts)
Number of Hopes: 1 (0.27 per 1000 words)
Number of Posts: 2
Total Words: 191 (median 95.5 words per post)
Longest Post: #5145 (187 words)
Posted Most In: [Inaccessible] Veritas City (1 posts)
Number of Hopes: 0 (0 per 1000 words)
Number of Posts: 19
Total Words: 3619 (median 167 words per post)
Longest Post: #9456 (551 words)
Posted Most In: Treasure Town ~ Main Square (6 posts)
Number of Hopes: 4 (1.11 per 1000 words)
Number of Posts: 136
Total Words: 19650 (median 127.5 words per post)
Longest Post: #45911 (443 words)
Posted Most In: Destiny Village ~ X-Eye Cauldron (28 posts)
Number of Hopes: 5 (0.25 per 1000 words)
Number of Posts: 117
Total Words: 11964 (median 88 words per post)
Longest Post: #45480 (282 words)
Posted Most In: Destiny Village ~ Parfait Way (41 posts)
Number of Hopes: 4 (0.33 per 1000 words)
Number of Posts: 4
Total Words: 472 (median 101.5 words per post)
Longest Post: #7263 (213 words)
Posted Most In: [Inaccessible] Veritas City (4 posts)
Number of Hopes: 0 (0 per 1000 words)
Number of Posts: 5780
Total Words: 763743 (median 89 words per post)
Longest Post: #47646 (2610 words)
Posted Most In: Destiny Colosseum (399 posts)
Number of Hopes: 260 (0.34 per 1000 words)
Number of Posts: 284
Total Words: 19573 (median 51 words per post)
Longest Post: #25498 (662 words)
Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (56 posts)
Number of Hopes: 22 (1.12 per 1000 words)
Number of Posts: 312
Total Words: 36200 (median 84 words per post)
Longest Post: #10307 (1044 words)
Posted Most In: Spinda Cafe (62 posts)
Number of Hopes: 10 (0.28 per 1000 words)
Number of Posts: 1443
Total Words: 233417 (median 129 words per post)
Longest Post: #49459 (3919 words)
Posted Most In: Destiny Village ~ Chip's Lab (124 posts)
Number of Hopes: 84 (0.36 per 1000 words)
Number of Posts: 44
Total Words: 2565 (median 48 words per post)
Longest Post: #32831 (170 words)
Posted Most In: Poppin' Arms ~ Dorm Lounge (14 posts)
Number of Hopes: 1 (0.39 per 1000 words)
Number of Posts: 347
Total Words: 21742 (median 50 words per post)
Longest Post: #26381 (632 words)
Posted Most In: Destiny Village ~ Sundae Park (41 posts)
Number of Hopes: 7 (0.32 per 1000 words)
Number of Posts: 123
Total Words: 28902 (median 211 words per post)
Longest Post: #29154 (1109 words)
Posted Most In: Tree of Life ~ Exterior (15 posts)
Number of Hopes: 13 (0.45 per 1000 words)
Number of Posts: 52
Total Words: 4606 (median 81.5 words per post)
Longest Post: #45736 (279 words)
Posted Most In: Destiny Village ~ X-Eye Cauldron (29 posts)
Number of Hopes: 3 (0.65 per 1000 words)
Number of Posts: 251
Total Words: 21914 (median 72 words per post)
Longest Post: #38753 (2108 words)
Posted Most In: Destiny Village ~ Sundae Park (44 posts)
Number of Hopes: 7 (0.32 per 1000 words)
Number of Posts: 39
Total Words: 4018 (median 100 words per post)
Longest Post: #41877 (213 words)
Posted Most In: Grass Continent ~ Treasure Town Beach (21 posts)
Number of Hopes: 2 (0.5 per 1000 words)
Number of Posts: 583
Total Words: 46887 (median 63 words per post)
Longest Post: #38721 (1449 words)
Posted Most In: Sharpedo Bluff (59 posts)
Number of Hopes: 26 (0.55 per 1000 words)
Number of Posts: 37
Total Words: 3486 (median 81 words per post)
Longest Post: #42169 (263 words)
Posted Most In: Mystery Continent ~ Northern Pines (23 posts)
Number of Hopes: 7 (2.01 per 1000 words)
Number of Posts: 133
Total Words: 8889 (median 53 words per post)
Longest Post: #8031 (291 words)
Posted Most In: Treasure Town ~ Marketplace (33 posts)
Number of Hopes: 2 (0.22 per 1000 words)
Number of Posts: 218
Total Words: 42377 (median 157 words per post)
Longest Post: #27665 (981 words)
Posted Most In: Rocky Road ~ House of Kate, Cabot, and Nip (34 posts)
Number of Hopes: 6 (0.14 per 1000 words)
Number of Posts: 227
Total Words: 8112 (median 29 words per post)
Longest Post: #49610 (165 words)
Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (61 posts)
Number of Hopes: 9 (1.11 per 1000 words)
Number of Posts: 285
Total Words: 30147 (median 92 words per post)
Longest Post: #15513 (620 words)
Posted Most In: Treasure Town ~ Marowak Dojo (60 posts)
Number of Hopes: 9 (0.3 per 1000 words)
Number of Posts: 576
Total Words: 91571 (median 130.5 words per post)
Longest Post: #47308 (1882 words)
Posted Most In: Destiny Colosseum (96 posts)
Number of Hopes: 35 (0.38 per 1000 words)
Number of Posts: 18
Total Words: 1206 (median 69.5 words per post)
Longest Post: #43292 (176 words)
Posted Most In: Destiny Colosseum ~ Training Rooms (10 posts)
Number of Hopes: 1 (0.83 per 1000 words)
Number of Posts: 70
Total Words: 13853 (median 157 words per post)
Longest Post: #7520 (1089 words)
Posted Most In: Treasure Town ~ Main Square (17 posts)
Number of Hopes: 2 (0.14 per 1000 words)
Number of Posts: 113
Total Words: 13042 (median 95 words per post)
Longest Post: #18043 (482 words)
Posted Most In: Marowak Dojo ~ Living Quarters (21 posts)
Number of Hopes: 3 (0.23 per 1000 words)
Number of Posts: 31
Total Words: 741 (median 20 words per post)
Longest Post: #6566 (52 words)
Posted Most In: Treasure Town ~ Marowak Dojo (9 posts)
Number of Hopes: 0 (0 per 1000 words)
Number of Posts: 2811
Total Words: 330332 (median 97 words per post)
Longest Post: #12917 (1041 words)
Posted Most In: Destiny Colosseum (290 posts)
Number of Hopes: 164 (0.5 per 1000 words)
Number of Posts: 958
Total Words: 77360 (median 59 words per post)
Longest Post: #49147 (878 words)
Posted Most In: Grass Continent ~ Treasure Town Beach (121 posts)
Number of Hopes: 12 (0.16 per 1000 words)
Number of Posts: 371
Total Words: 40860 (median 98 words per post)
Longest Post: #25484 (390 words)
Posted Most In: Marowak Dojo ~ Living Quarters (58 posts)
Number of Hopes: 26 (0.64 per 1000 words)