clean_text_for_cloud <- function(string) {
  
# We're going to create a vector of character names
# If they aren't deleted, they clog up the word cloud
blc_characters <- c(
  'araun',
  'aria',
  'astrid',
  'bahamut',
  'ben',
  'burhalla',
  'brisa',
  'cabot',
  'cal',
  'celeste',
  'cino',
  'coco',
  'cuicatl',
  'curio',
  'cynthian',
  'dave',
  'dill',
  'drake',
  'echo',
  'esper',
  'icetales',
  'inno',
  'ivy',
  'kate',
  'koa',
  'kora',
  'lexx',
  'luz',
  'melanie',
  'mergo',
  'mellow',
  'mia',
  'myles',
  'nate',
  'nero',
  'nip',
  'owen',
  'pana',
  'petram',
  'pleo',
  'polaris',
  'puddle',
  'raven',
  'rocky',
  'saltriv',
  'scraggy',
  'scrafty',
  'seyka',
  'shiron',
  'skara',
  'starr',
  'soul',
  'tempest',
  'tricky',
  'vix',
  'zane'
)
  
# hrrk, can't brain
# can't think of how to map this
# use a terrible loop instead...
clean_string <- string %>% str_replace_all('[:digit:]', '')
for (i in 1:length(stopwords('english'))) {
  clean_string <- str_replace_all(clean_string, paste0('\\b', stopwords('english')[i], '\\b'), '')
}

# Do it again for character names
for (i in 1:length(blc_characters)) {
  clean_string <- str_replace_all(clean_string, paste0('\\b', blc_characters[i], '\\b'), '')
}

# And finally, get rid of uninteresting, common words that float to the top
# Let the record show that the actual most-used non-stop word in BLC was "like"
nuisance_words <- c(
  'like',
  'just',
  'said'
)

for (i in 1:length(nuisance_words)) {
  clean_string <- str_replace_all(clean_string, paste0('\\b', nuisance_words[i], '\\b'), '')
}

# Zap spaces introduced by the above steps
clean_string <- str_replace_all(clean_string, '[:space:]{2,}', ' ')

return(clean_string)
}

create_user_doc <- function(data, user) {
  
  data %>%
    filter(username == user) %>%
    {paste(.$clean_post, collapse = ' ')} %>%
    clean_text_for_cloud() %>%
    return()
  
}

extract_freqs <- function(dtm, row) {
  # Takes a document term matrix and row number (indexes a document)
  # Returns a data frame of terms and frequencies for that document,
  # sorted by descending frequencies, truncated to 200 (so the laptop doesn't explode when creating the wordcloud)
  framed_freqs <- data.frame(as.matrix(t(dtm[row,]))) %>%
    rownames_to_column()
  
  names(framed_freqs) <- c('term', 'freq')
  
  framed_freqs %>%
    arrange(-freq) %>%
    # The wordcloud function has a hernia about too-large data frames,
    # so trim these down a bit
    head(n = 200) %>%
    return()
}

format_post <- function(data) {
  return(paste0('<a href="https://forums.thousandroads.net/index.php?threads/', data['thread_id'], '/post-', data['post_id'], '">#', data['post_id'], '</a>'))
}

format_thread <- function(data) {
  return(paste0('<a href="https://forums.thousandroads.net/index.php?threads/', data['thread_id'], '">', data['thread_title'], '</a>'))
}

format_username <- function(data) {
  return(paste0('<a href="https://forums.thousandroads.net/index.php?members/', data['user_id'], '">', data['username'], '</a>'))
}

get_highest_poster <- function(data) {
  # R doesn't provide a convenient "mode" function
  # So we need to write our own to work with summarize()
  return(names(sort(table(data), decreasing = TRUE)[1]))
}

user_bar_plot <- function(data, stratifier, xvar = 'username') {
  # As ever, using ggplot in a function context is awful
  symd <- sym(stratifier)
  u_symd <- sym(xvar)
  
  ggplot(data, aes(x = reorder(!!u_symd, !!symd), y = !!symd)) +
  geom_col(fill = '#240440') +
  # Note how the use of symd does not require (and in fact, actively rejects) quasiquotation
  # I have no fucking idea why!
  geom_text(aes_string(label = symd), hjust = -0.15) +
  scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
  coord_flip() +
  theme_minimal() +
  xlab('')
}
# Load full post dataset
posts <- read_tsv('blacklight_posts.txt', col_names = c('post_id', 'user_id', 'thread_title', 'thread_id', 'username', 'post'))

# Clean up our text a little
cleaned_posts <- posts %>%
  # Zap square brackets *specifically* at the start/end of lines
  # in an attempt to rescue group comm posts that otherwise get reduced to an empty string
  mutate(clean_post = if_else(
    str_replace_all(post, '\\[[^\\]]*\\]', '') == "",
    # Delete leading/trailing square brackets
    substr(post, 2, nchar(post) - 1),
    post
  )) %>%
  # Zap BBCode
  mutate(clean_post = str_replace_all(clean_post, '\\[[^\\]]*\\]', '')) %>%
  # Force lowercase
  mutate(clean_post = tolower(clean_post)) %>%
  # Lazy regularization: remove 's
  mutate(clean_post = str_replace_all(clean_post, "'s\\b", '')) %>%
  # Fix a couple cases: em dashes would get deleted in next step, resulting in words getting smooshed
  # We want to nix the punctuation but keep the words separate
  mutate(clean_post = str_replace_all(clean_post, "--", ' ')) %>%
  mutate(clean_post = str_replace_all(clean_post, "—", ' ')) %>%
  # Ellipses can also cause this problem
  mutate(clean_post = str_replace_all(clean_post, "\\.{2,}", ' ')) %>%
  # Annihilate scene breaks
  mutate(clean_post = str_replace_all(clean_post, "[\\<\\>]", ' ')) %>%
  # Delete punctuation
  mutate(clean_post = str_replace_all(clean_post, '[:punct:]', '')) %>%
  # Transform runs of spaces into single spaces
  mutate(clean_post = str_replace_all(clean_post, '[:space:]{2,}', ' ')) %>%
  mutate(clean_post = str_trim(clean_post)) %>%
  # Count words based on space boundaries
  rowwise() %>%
  mutate(
    wordcount = length(str_split(clean_post, '\\s+')[[1]]),
    hopecount = str_count(clean_post, '\\bhope[s]?\\b'),
    hope_idx = hopecount/wordcount
    ) %>%
  ungroup()

# Further processing for word-cloud-ing
# For whatever reason tm_map's implementation of the stem word removal function takes so long
# it isn't practical even to look at the full corpus for some of the top posters!
# It must be doing something fancy, because my stupid regex version here handles it in a relative eyeblink
# 
# 
# # Make the whole of the RP into one big, long string
# fulltext <- paste(cleaned_posts$clean_post, collapse = ' ') %>%
#   # Eliminate numbers (e.g. damage reports)
#   str_replace_all('[:digit:]', '')
# 
# # hrrk, can't brain
# # can't think of how to map this
# # use a terrible loop instead...
# fulltext_clean <- fulltext
# for (i in 1:length(stopwords('english'))) {
#   fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', stopwords('english')[i], '\\b'), '')
# }
# 
# # Do it again for character names
# for (i in 1:length(blc_characters)) {
#   fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', blc_characters[i], '\\b'), '')
# }
# 
# # And finally, get rid of uninteresting, common words that float to the top
# # Let the record show that the actual most-used non-stop word in BLC was "like"
# nuisance_words <- c(
#   'like',
#   'just',
#   'said'
# )
# 
# for (i in 1:length(nuisance_words)) {
#   fulltext_clean <- str_replace_all(fulltext_clean, paste0('\\b', nuisance_words[i], '\\b'), '')
# }
# 
# # Zap spaces introduced by the above steps
# fulltext_clean <- str_replace_all(fulltext_clean, '[:space:]{2,}', ' ')
# 
# BLCorpus <- Corpus(VectorSource(fulltext_clean))
# 
# # Do some stemming
# BLCorpus_cleaned <- BLCorpus %>% tm_map(stemDocument)
# 
# # Create the term document matrix
# blc_tdm <- TermDocumentMatrix(BLCorpus_cleaned) %>%
#   as.matrix()
# 
# # Get word frequencies and construct wordcloud
# word_freqs <- sort(rowSums(blc_tdm),decreasing=TRUE)
# 
# word_freqs_frame <- data.frame(word = names(word_freqs),freq=word_freqs)
# 
# wordcloud(words = word_freqs_frame$word, freq = word_freqs_frame$freq, min.freq = 1,
#           max.words=200, random.order=FALSE, rot.per=0.35,
#           colors=rev(brewer.pal(8, "PuOr")))

# Convenient to have these broken out for the in-text references
post_counts <- posts %>% add_count(username) %>% arrange(-n) %>% distinct(user_id, username, n)

word_counts <- cleaned_posts %>%
  group_by(username) %>%
  summarize(user_id = max(user_id), words = sum(wordcount)) %>%
  arrange(-words)

thread_summary <- cleaned_posts %>%
  group_by(thread_id) %>%
  summarize(
    thread_title = max(thread_title),
    count = n(),
    words = sum(wordcount),
    wpp = words/count,
    username = get_highest_poster(username),
    user_id = get_highest_poster(user_id)
    ) %>%
  arrange(-count)

Introduction

Blacklight was an extended roleplaying campaign that was first announced July 17, 2020 and which finally concluded 500 days later on November 29, 2021. With this document I'm going to take a look at some of the stats surrounding the RP--how many posts were made, who made the most of them, and so on and so forth.

For this analysis I'm looking at all non-sticky threads in the Blacklight Campaign forum and all its sub-forums. I wanted to focus on actual posts with role-playing, so cutting out e.g. the ~3k posts in the "Official Actions" thread that are mostly just people claiming points and redoing stats was desired. I've also removed the Art & Doodles and Prize Volunteers threads, which aren't actually RP content. They both also contain posts from people who didn't actually participate in the RP, which gums everything up! Deleted posts were also excluded.

With all that said, let's take a look at the results!

Overall Stats

When all was said and done, the RP ran for 25837 posts and 3014474 words! A total of 44 users contributed to the RP. Unsurprisingly, the most prolific poster was Namohysip, with 5780 posts, or 22.4% of the total. In second place was unrepentantAuthor with 2811, 10.9% of the total. The full breakdown of posts by user is below.

user_bar_plot(post_counts, 'n') +
  ggtitle('Blacklight Posts Per User') +
  ylab('Total Posts')

The word count per user looks a little bit different! Unsurprisingly, the first couple slots are the same, as the sheer number of posts by the most prolific players ensure that even if they had shorter posts than the norm (rarely!) they were nevertheless putting in the most words overall. However, there are more shake-ups further down the list. Cresselia92 had the ninth-highest number of posts but jumped to 3rd place in terms of word count--slightly fewer posts, but with a lot of effort put into them! Spiteful Murkrow didn't crack the top ten word counts, but had the biggest discrepancy between post count and word count out of anyone, climbing nine places in the rankings. If he'd joined the RP at its outset, Kate might have ended up with one of the longest stories of any character!

user_bar_plot(word_counts, 'words') +
  ggtitle('Total Words Per User') +
  ylab('Total Words')

In addition to total words written, looking at the median words written per player gives a sense of their preferred RP style. Whether it's short and snappy back and forth or long posts full of description and introspection, there was a great deal of variety in what sort of posts different players gravitated towards. This is one of few metrics where Namohysip doesn't land on top by a mile! I'm tickled by how many ties we see here, too.

cleaned_posts %>%
  group_by(username) %>%
  summarize(med_words = median(wordcount)) %>%
  user_bar_plot('med_words') +
  ggtitle('Median Words Per Post, by Player') +
  ylab('Median Words Per Post')

We can also take a look at the overall distribution of post length to get a sense of the RP's general vibe.

The mean word count per post was 116.7, and the median was 86, indicating that the typical RP post was roughly a paragraph in length. If people deviated from that, they were more likely to go shorter than longer. However, there were a few cases where people broke out truly epic-length replies, and these dragged the mean word count a little towards the right. Overall, Blacklight wasn't a "casual" RP, but neither was it one where players are expected to put in multiple paragraphs per reply, as is often the case for forum-based games.

ggplot(cleaned_posts, aes(x = wordcount)) +
  geom_histogram(bins = 100, fill = '#240440') +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, by = 100), expand = expansion(mult = c(0, 0.1))) +
  scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
  theme_minimal() +
  labs(
    title = 'Overall Distribution of Words Per Post',
    x = 'Words Per Post',
    y = 'Count',
    caption = 'Sixty-four posts of more than 1000 words have been excluded in the interest of readability.'
    )

So what were those ultralong posts? Let's take a look. Here are the top ten longest posts in the RP:

# Thiiiiis is messy. Paying the price for being lazy in how I set up my formatting functions
longest_posts <- cleaned_posts %>%
  arrange(-wordcount) %>%
  head(n = 10)

data.frame(
  Post = apply(longest_posts, 1, format_post),
  Thread = apply(longest_posts, 1, format_thread),
  Player = apply(longest_posts, 1, format_username),
  Words = longest_posts$wordcount
) %>%
  kable('html', escape = FALSE) %>%
  kable_styling()
Post Thread Player Words
#49459 Rocky Road ~ House of Starr and Nate Negrek 3919
#28294 Poppin' Arms ~ Dorm Lounge Cresselia92 3370
#39214 [Support] Other Storms DragonD 2728
#39288 [Support] Other Storms Negrek 2704
#49558 Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix Fusion 2631
#47646 Null Realm ~ Blacklight Eternity Namohysip 2610
#36850 Nil Plateaus Fusion 2547
#37368 Destiny Colosseum Namohysip 2469
#41597 The Moon Namohysip 2445
#38753 [Support] Other Storms RJR Basimilus 2108

The all-time longest post in the RP contains nearly 4,000 words and is over 500 words longer than the next-longest on the list. Perhaps someone got confused and thought they were writing a fanfic chapter instead of an RP post; not sure what's up with that. Overall Namohysip contributed the most of the top ten longest posts, with three, while Negrek and Fusion had two each. The "Other Storms" thread, intended for Support players to write scenes with their familiars while Front-line players fought them in individual thread, had the most of these longest posts. Front-line players probably ended up writing as many or more words over the course of their respective scenes during the boss fight, but as most Support players condensed their entire character progression into a single post without DM intervention, they ended up with much higher single-post word counts.

On the other end of the spectrum, let's take a look at the RP's shortest posts. Excluding posts that were solely an emoji, image, or scene break, here are all the single-word posts made over the course of the RP:

single_word_posts <- cleaned_posts %>%
  # We have to do some fancy filtering to remove images and scene breaks
  # I've also removed Group Chat posts that were a single word outside of the address
  # (e.g. [Soda > Team Spectrum])
  # How many words they count as is arguable, and this way is easier
  filter(wordcount == 1 & !grepl('>', post) & !grepl('ATTACH', post) & !grepl('img', post) & !grepl(':', post)) %>%
  # For fun, reformat some of the BBCode
  mutate(
    post = str_replace(post, '\\[I\\]', '<i>'),
    post = str_replace(post, '\\[/I\\]', '</i>'),
    post = str_replace(post, '\\[B\\]', '<b>'),
    post = str_replace(post, '\\[/B\\]', '</b>')
    ) %>%
  arrange(post_id)

data.frame(
  Post = apply(single_word_posts, 1, format_post),
  Thread = apply(single_word_posts, 1, format_thread),
  Player = apply(single_word_posts, 1, format_username),
  Content = single_word_posts$post
) %>%
  kable('html', escape = FALSE) %>%
  kable_styling()
Post Thread Player Content
# 5820 Treasure Town ~ Marowak Dojo Namohysip Later.
#12314 Group Communication TheGOAT Wuh?
#14593 Poppin' Arms Guild ~ Main Lobby Ambyssin “Delightful.”
#15695 Poppin' Arms ~ Large Dorms Shiny Phantump yes...”
#31689 Mystery Continent ~ Northern Pines Namohysip ...What.
#34464 Poppin' Arms Guild ~ Main Lobby IFBench Oh."
#34677 Mystery Continent ~ Northern Pines Ambyssin “Elaborate.”
#34900 Group Communication Ambyssin ... idiot.
#35087 Group Communication Fusion ...Ew.
#36511 Spirits' Edge Namohysip Creation.
#41899 Lush Prairie unrepentantAuthor Joule."
#43749 Destiny Colosseum unrepentantAuthor "Shit."
#47028 Destiny Village ~ Parfait Way Magyk "WHAT?!"
#47142 Destiny Village ~ Parfait Way Namohysip ...Brisa."

King of the single-word posts was Namohysip with four, followed by Ambyssin with three. The Group Communication thread had the most single-word posts, although it only contributed three of the fourteen. This thread contained a lot of short, quippy posts in general, so it's no surprise to see it showing up here. But funnily enough, none of Namohysip's single-word posts were in that thread!

Thread Statistics

Let's take a look at threads next. The RP consisted of 99 different threads, including the special Eterna Storm threads that housed one player scene each. Here are the most popular threads in the RP, based on total number of posts:

data.frame(
  Thread = apply(head(thread_summary, n = 10), 1, format_thread),
  Posts = head(thread_summary, n = 10)$count,
  Words = head(thread_summary, n = 10)$words,
  `Player with Most Posts` = apply(head(thread_summary, n = 10), 1, format_username)
) %>%
  kable('html',
        col.names = c('Thread', 'Posts', 'Words', 'Player with Most Posts'),
        escape = FALSE
        ) %>%
  kable_styling()
Thread Posts Words Player with Most Posts
Grass Continent ~ Treasure Town Beach 1213 115976 Adamhuarts
Destiny Village ~ Parfait Way 1162 107786 Namohysip
Destiny Village ~ Central Crossroads 1159 116458 Namohysip
Destiny Colosseum 1141 181279 Namohysip
Destiny Village ~ X-Eye Cauldron 874 91652 Dragonfree
Mystery Continent ~ Northern Pines 846 95335 Namohysip
Poppin' Arms Guild ~ Main Lobby 797 97993 Namohysip
Destiny Colosseum ~ Training Rooms 785 89189 unrepentantAuthor
Destiny Village ~ Sundae Park 759 75576 Namohysip
Destiny Tower 744 80672 Namohysip

Unsurprisingly, Namohysip contributed the greatest number of posts to the majority of these (and all) threads. However, we can see some fun character traits shining through in the most common posters in the other threads. Grass Continent ~ Treasure Town Beach is unusual to begin with, as not only the longest thread but also the only among the top ten that isn't in Destiny Village, and it turns out that Cynthian's its biggest visitor! Similarly, Brisa was the character to make the most use of the Destiny Colosseum ~ Training Rooms thread, while Dave was Destiny Village ~ X-Eye Cauldron's biggest customer!

The shortest threads aren't terribly interesting; unsurprisingly, these are mostly made up of character-specific Eterna Storm threads, which by their nature tend to be short. The shortest thread that represents an actual location as such is Void Core, with 17 posts, although even that thread was somewhat restricted in how many/how often players could access it.

We can also look at the threads with the meatiest posts.

thread_summary %>%
  arrange(-wpp) %>%
{data.frame(
  Thread = apply(head(., n = 10), 1, format_thread),
  wpp = round(head(., n = 10)$wpp, digits = 1),
  `Player with Most Posts` = apply(head(., n = 10), 1, format_username)
)} %>%
  kable('html',
        col.names = c('Thread', 'Words Per Post', 'Player with Most Posts'),
        escape = FALSE
        ) %>%
  kable_styling()
Thread Words Per Post Player with Most Posts
[Support] Other Storms 1577.8 Negrek
[Owen] Hot Spot Cave 540.6 Namohysip
[Icetales] Core of Chaos 302.8 Cresselia92
[Vix] Grand Arena 295.6 Fusion
The Moon 287.6 Namohysip
Radiant Void 278.2 Namohysip
Nil Plateaus 250.0 Fusion
Worldcore 246.3 Namohysip
Null Realm ~ Blacklight Eternity 233.0 IFBench
[Koa] Strange Building 221.2 Flyg0n

A full half of these threads are associated with the Eterna Storm boss battle, and particularly interesting to me are those belonging to Namohysip and Flyg0n, who didn't have particularly high word counts per post overall but who ended up in the top ten with their respective Eterna Storm threads. Clearly people pulled out all the stops for that event!

In fact, with the sole exception of Nil Plateaus, these are all boss battle threads; it probably isn't surprising that people put out some of their longest posts for those, Namohysip almost certainly among them! It's also appropriate to see the very last battle of the RP, Null Realm ~ Blacklight Eternity, appearing among the top ten. Things certainly went out with a bang!

Once again, considering the threads with the lowest number of words per post isn't tremendously interesting. Group Communication lands square on the bottom as expected, at a whopping 37.5 words per post.

Hopeful Statistics

We've looked at a number of broad-strokes metrics relating to how many words are present in posts or threads. Let's now zoom in a bit and consider what words are showing up. In particular, because hope is a major theme of the RP, we can see how often it comes up.

In the end, "hope" or "hopes" was used 1228 times over the course of the RP, or roughly once per 2454.8 posts. We can also take a look at the RP's various players and construct for them a "hope index," a measure of how many times they use "hope" or "hopes" per 1000 words.

cleaned_posts %>%
  group_by(username) %>%
  summarize(hope_idx = round(sum(hopecount)/sum(wordcount) * 1000, digits = 2)) %>%
  user_bar_plot('hope_idx') +
  ggtitle('Hope Index by Player') +
  ylab('"Hope" Per 1000 Words')

I find the results here pretty surprising! To some extent, this measure favors players who tend to make shorter posts, although Cresselia92 has a pretty respectable hope index, despite putting in a lot of words in general. And it perhaps isn't surprising that Namohysip doesn't appear higher on the list, as he was playing all the NPCs, some of whom aren't especially hopeful. I'd be curious to see how his ranking might change if this analysis were limited to Owen posts! Finally, Ambyssin's top-half placement is interesting to me. Bahamut is a rather notoriously unhopeful character, but one who perhaps talked about it more often than many others--fixated on it in a negative way, perhaps. Unfortunately this analysis doesn't look at any of the context around the words involved, so a character dismissing hope reads as "hopeful" as one embracing it.

We can also look at the most hopeful posts as well as the most hopeful players:

most_hopeful_posts <- cleaned_posts %>%
  arrange(-hope_idx) %>%
  head(n = 10) %>%
  mutate(
    post = str_replace(post, '\\[I\\]', '<i>'),
    post = str_replace(post, '\\[/I\\]', '</i>'),
    post = str_replace(post, '\\>', '&lt;')
    )

data.frame(
  Post = apply(most_hopeful_posts, 1, format_post),
  Thread = apply(most_hopeful_posts, 1, format_thread),
  Player = apply(most_hopeful_posts, 1, format_username),
  Content = most_hopeful_posts$post
) %>%
  kable('html', escape = FALSE) %>%
  kable_styling()
Post Thread Player Content
#47185 Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix IFBench Hope that I'll be able to help. Hope that I can save others. Hope that I can make things better," Saltriv answered, smiling brightly.
#15834 Poppin' Arms Guild ~ Infirmary Persephone Oh! Yeah, I'll take as many as you want to give. Thank you!" Still hope. Still. Hope.
#21487 Group Communication Virgil134
#47915 Destiny Village ~ Central Crossroads Ambyssin Bahamut smirked. "Fortunately for you, hope's a big thing here."
#45780 Rocky Road ~ House of Diyem, Eien, Mellow, and Toast IFBench Saltriv's eyes widened, and a glimmer of hope sparkled within them. There was hope still yet. "Thank you!" they exclaimed.
#45366 Poppin' Arms Guild ~ Main Lobby unrepentantAuthor Brisa winced, and looked to Diyem in hopes of being wrong.
#45777 Rocky Road ~ House of Diyem, Eien, Mellow, and Toast Sudmensch Dill looked back up, blinking as his hopes pricked back up.
#32755 Rocky Road ~ House of Brisa, Dave, Koa, and Owen Dragonfree Dave raised an eyebrow. "Therapy? I it's not with fucking Bean."
#42307 Destiny Village ~ Parfait Way Ambyssin “You don’t actually turn Blacklight type.” Nero shook his head. “At least, I hope not.”
#33299 Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix Navar I'm Shiron!" he waved and smiled. "Nice to meet you. Hope we can be friends!"

Taking a look at the results reveals some of the weaknesses of this analysis. It favors short posts that happen to have hope mentioned as little as once, rather than posts that evoke it multiple times (more on that later), and it's oblivious to context--not all of the uses here are addressing "hope" as a concept, and not all of them are necessarily positive about hope, which is maybe contrary to the themes of the RP.

Nevertheless, I think IFBench's "most hopeful post" here does capture the spirit of the RP perfectly. What's more Blacklight than "Hope that I'll be able to help. Hope that I can save others. Hope that I can make things better"?

Let's take one last, brief look at those posts that have the highest total uses of the words "hope" or "hopes," regardless of their length:

most_hopes_posts <- cleaned_posts %>%
  arrange(-hopecount, -post_id) %>%
  head(n = 10)

data.frame(
  Post = apply(most_hopes_posts, 1, format_post),
  Thread = apply(most_hopes_posts, 1, format_thread),
  Player = apply(most_hopes_posts, 1, format_username),
  hopenum = most_hopes_posts$hopecount
) %>%
  kable(
    'html',
    escape = FALSE,
    col.names = c('Post', 'Thread', 'Player', 'Number of Hopes')
    ) %>%
  kable_styling()
Post Thread Player Number of Hopes
#20192 Sand Continent ~ Sand Dune of Spirits Cresselia92 10
#38396 Mystery Continent ~ Northern Pines TheGOAT 6
#28294 Poppin' Arms ~ Dorm Lounge Cresselia92 5
#25499 Reverse World ~ Downsideup City Cresselia92 5
#49477 Offscreen Activities Namohysip 4
#49392 Offscreen Activities Negrek 4
#48820 Destiny Colosseum Namohysip 4
#47641 Null Realm ~ Blacklight Eternity Cresselia92 4
#22566 Abandoned Laboratory unrepentantAuthor 4
#47646 Null Realm ~ Blacklight Eternity Namohysip 3

Here Cresselia92's hopeful posting really shines through! She has the lion's share of the top ten posts by hope count, as well as the most-hopeful post by count overall, which if you click through and read it is definitely embracing Blacklight's themes! All in all there were 10 posts in the RP that used "hope" or "hopes" more than once. Namohysip actually had the greatest number of these, at 3, but Cresselia92 ended up right behind with 4. Although she didn't necessarily top the list on all of the hope-related stats, she consistently placed high and clearly created a character that strongly resonated with the RP's themes.

As a fun quirk of the data, all of the "hopes" in Namohysip's Offscreen Activities post actually came from Negrek's Offscreen Activities post: they were simply contained in quotes. Another limitation of this analysis!

Individual Stats

Finally, I'm going to break things out for each player individually, both so people can look up their numbers in a single, convenient place, and to add a couple new analyses, too!

Most notably, for players with at least 100 posts, I've included a word cloud that shows that player's most-used distinctive words. Unfortunately, the word cloud generator kept exploding for people with fewer than 100 posts, so I've had to exclude that analysis for them.

A user's most distinctive words aren't necessarily the ones they use most often, but they are the ones they used most often relative to other players in the RP. In general, everybody's actual most common words are things like, well, "like," "said," etc. One side-effect of this is that proper names tend to show up really strongly--in general, nobody mentioned a player character's name more than their own player! I've therefore filtered out the names of player characters and familiars in order to try and let more interesting words rise to the top. It was kind of fun to see clouds essentially "labelled" with a great big "CHARACTER NAME" plopped in the middle of each cloud, but it made it hard to read the more-interesting words that were pushed to the margins.

In that vein, I don't know what's going on with TheGOAT's cloud, where the usual name-cleaning function seems to have failed. Somehow he used a lot of words that got stemmed down to "astrid" but didn't exactly match "astrid." Fortunately the clean-up worked at least a little, since "astrid" isn't nearly as dominant in that cloud as it would be if most instances of her name hadn't gotten zapped, but in general I have no idea what's going on in that cloud, and I blame TheGOAT. What even is that... meteorite... emoji... thing?

# We basically want a single map + function call to programmatically generate a whole bunch of stats for individual players
# So let's, uh, see if we can make this happen

display_player <- function(data, word_freqs, user) {
  
  filtered_data <- data %>% filter(username == user)
  
  g <- filtered_data %>%
    ggplot(aes(x = wordcount)) +
    geom_histogram(fill = '#240440') +
    scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
    theme_minimal() +
    ggtitle(paste('Distribution of Words Per Post for', user)) +
    xlab('Words Per Post') +
    ylab('Count')
  
  cat(paste0(
    '<h3>', user, 
    '</h3>\n**Number of Posts:** ', nrow(filtered_data),
    '\n\n**Total Words:** ', sum(filtered_data$wordcount), ' (median ', median(filtered_data$wordcount), ' words per post)',
    '\n\n**Longest Post:** ', format_post(arrange(filtered_data, -wordcount)[1,]), ' (', arrange(filtered_data, -wordcount)[[1, 'wordcount']], ' words)',
    '\n\n'
  ))
  
  print(g)
  
  cat(paste0(
    '\n\n**Posted Most In:** ', format_thread(arrange(add_count(filtered_data, thread_id), -n)[1,]), ' (', arrange(add_count(filtered_data, thread_id), -n)[[1, 'n']], ' posts)',
    '\n\n**Number of Hopes:** ', sum(filtered_data$hopecount), ' (', round(sum(filtered_data$hopecount) * 1000 / sum(filtered_data$wordcount), digits = 2), ' per 1000 words)',
    '\n\n'
))
  
  # Wrap up with a word cloud!
  # ...if the user has enough posts
  # only then
  if (nrow(filtered_data) >= 100) {
    cat(paste0('<h4>Most Distinct Words for ', user, '</h4>'))
      wordcloud(words = word_freqs[[user]]$term, freq = word_freqs[[user]]$freq,
          max.words=200, random.order=FALSE, rot.per=0.35,
           colors=rev(brewer.pal(8, "Dark2")))
  }
  
  cat('\n\n*****\n\n')
  
}

# We're going to make word clouds for everybody!
# In order to make TF-IDF go
# (and trust me, TF-IDF produces *far* more amusing results than straight term frequency)
# we need to create a corpus where each document is the sum total of a player's blacklight posts
# When we reach their individual print step, we'll then fetch the info on their doc specifically from the results
distinct_users <- sort(distinct(cleaned_posts, username)$username)

# Create the user lookup that we really should have been using since the very beginning, honestly
user_lookup <- seq(1, length(distinct_users))
names(user_lookup) <- distinct_users
user_lookup <- as.list(user_lookup)

user_docs <- map_chr(distinct_users, ~ create_user_doc(cleaned_posts, .x))

BLCorpus <- Corpus(VectorSource(user_docs))

# Do some stemming
BLCorpus_cleaned <- BLCorpus %>% tm_map(stemDocument)

dtm <- DocumentTermMatrix(BLCorpus_cleaned, control = list(weighting = weightTfIdf))

user_word_freqs <- map(distinct_users, ~ extract_freqs(dtm, user_lookup[[.x]]))
names(user_word_freqs) <- distinct_users

walk(distinct_users, ~ display_player(cleaned_posts, user_word_freqs, .x))

Adamhuarts

Number of Posts: 1304

Total Words: 123652 (median 78 words per post)

Longest Post: #6338 (795 words)

Posted Most In: Grass Continent ~ Treasure Town Beach (150 posts)

Number of Hopes: 75 (0.61 per 1000 words)

Most Distinct Words for Adamhuarts


Ambyssin

Number of Posts: 2085

Total Words: 145310 (median 49 words per post)

Longest Post: #25503 (1207 words)

Posted Most In: Destiny Village ~ Parfait Way (184 posts)

Number of Hopes: 75 (0.52 per 1000 words)

Most Distinct Words for Ambyssin


BlackHairedPsycho

Number of Posts: 18

Total Words: 382 (median 19.5 words per post)

Longest Post: #26173 (65 words)

Posted Most In: Poppin' Arms Guild ~ Infirmary (9 posts)

Number of Hopes: 0 (0 per 1000 words)


Chibi Pika

Number of Posts: 1099

Total Words: 128155 (median 97 words per post)

Longest Post: #36996 (946 words)

Posted Most In: Destiny Colosseum (130 posts)

Number of Hopes: 27 (0.21 per 1000 words)

Most Distinct Words for Chibi Pika


Cresselia92

Number of Posts: 1047

Total Words: 243309 (median 181 words per post)

Longest Post: #28294 (3370 words)

Posted Most In: Poppin' Arms ~ Dorm Lounge (93 posts)

Number of Hopes: 163 (0.67 per 1000 words)

Most Distinct Words for Cresselia92


DeliriousAbsol

Number of Posts: 2

Total Words: 290 (median 145 words per post)

Longest Post: #5136 (235 words)

Posted Most In: [Inaccessible] Veritas City (1 posts)

Number of Hopes: 0 (0 per 1000 words)


Disco

Number of Posts: 621

Total Words: 36905 (median 47 words per post)

Longest Post: #9915 (337 words)

Posted Most In: Tree of Life ~ Exterior (84 posts)

Number of Hopes: 14 (0.38 per 1000 words)

Most Distinct Words for Disco


DragonD

Number of Posts: 55

Total Words: 18694 (median 211 words per post)

Longest Post: #39214 (2728 words)

Posted Most In: Shaymin Village (19 posts)

Number of Hopes: 4 (0.21 per 1000 words)


Dragonfree

Number of Posts: 1503

Total Words: 146268 (median 76 words per post)

Longest Post: #19454 (1490 words)

Posted Most In: Destiny Village ~ X-Eye Cauldron (171 posts)

Number of Hopes: 41 (0.28 per 1000 words)

Most Distinct Words for Dragonfree


Equitial

Number of Posts: 56

Total Words: 5769 (median 80.5 words per post)

Longest Post: #5995 (330 words)

Posted Most In: Marowak Dojo ~ Living Quarters (19 posts)

Number of Hopes: 1 (0.17 per 1000 words)


ExplosiveTurkey

Number of Posts: 204

Total Words: 15145 (median 66.5 words per post)

Longest Post: #43141 (285 words)

Posted Most In: Grass Continent ~ Treasure Town Beach (36 posts)

Number of Hopes: 6 (0.4 per 1000 words)

Most Distinct Words for ExplosiveTurkey


Flyg0n

Number of Posts: 689

Total Words: 88248 (median 105 words per post)

Longest Post: #39367 (827 words)

Posted Most In: Rocky Road ~ House of Brisa, Dave, Koa, and Owen (115 posts)

Number of Hopes: 31 (0.35 per 1000 words)

Most Distinct Words for Flyg0n


Fusion

Number of Posts: 1128

Total Words: 148128 (median 108 words per post)

Longest Post: #49558 (2631 words)

Posted Most In: Destiny Village ~ Central Crossroads (70 posts)

Number of Hopes: 41 (0.28 per 1000 words)

Most Distinct Words for Fusion


IFBench

Number of Posts: 544

Total Words: 29020 (median 35 words per post)

Longest Post: #41340 (446 words)

Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (104 posts)

Number of Hopes: 25 (0.86 per 1000 words)

Most Distinct Words for IFBench


Jedi Shulk

Number of Posts: 98

Total Words: 3760 (median 21 words per post)

Longest Post: #5833 (353 words)

Posted Most In: Treasure Town ~ Main Square (26 posts)

Number of Hopes: 1 (0.27 per 1000 words)


kintsugi

Number of Posts: 2

Total Words: 191 (median 95.5 words per post)

Longest Post: #5145 (187 words)

Posted Most In: [Inaccessible] Veritas City (1 posts)

Number of Hopes: 0 (0 per 1000 words)


kyeugh

Number of Posts: 19

Total Words: 3619 (median 167 words per post)

Longest Post: #9456 (551 words)

Posted Most In: Treasure Town ~ Main Square (6 posts)

Number of Hopes: 4 (1.11 per 1000 words)


MadderJacker

Number of Posts: 136

Total Words: 19650 (median 127.5 words per post)

Longest Post: #45911 (443 words)

Posted Most In: Destiny Village ~ X-Eye Cauldron (28 posts)

Number of Hopes: 5 (0.25 per 1000 words)

Most Distinct Words for MadderJacker


Magyk

Number of Posts: 117

Total Words: 11964 (median 88 words per post)

Longest Post: #45480 (282 words)

Posted Most In: Destiny Village ~ Parfait Way (41 posts)

Number of Hopes: 4 (0.33 per 1000 words)

Most Distinct Words for Magyk


MidnightMutetation

Number of Posts: 4

Total Words: 472 (median 101.5 words per post)

Longest Post: #7263 (213 words)

Posted Most In: [Inaccessible] Veritas City (4 posts)

Number of Hopes: 0 (0 per 1000 words)


Namohysip

Number of Posts: 5780

Total Words: 763743 (median 89 words per post)

Longest Post: #47646 (2610 words)

Posted Most In: Destiny Colosseum (399 posts)

Number of Hopes: 260 (0.34 per 1000 words)

Most Distinct Words for Namohysip


Navar

Number of Posts: 284

Total Words: 19573 (median 51 words per post)

Longest Post: #25498 (662 words)

Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (56 posts)

Number of Hopes: 22 (1.12 per 1000 words)

Most Distinct Words for Navar


NebulaDreams

Number of Posts: 312

Total Words: 36200 (median 84 words per post)

Longest Post: #10307 (1044 words)

Posted Most In: Spinda Cafe (62 posts)

Number of Hopes: 10 (0.28 per 1000 words)

Most Distinct Words for NebulaDreams


Negrek

Number of Posts: 1443

Total Words: 233417 (median 129 words per post)

Longest Post: #49459 (3919 words)

Posted Most In: Destiny Village ~ Chip's Lab (124 posts)

Number of Hopes: 84 (0.36 per 1000 words)

Most Distinct Words for Negrek


ntmymike

Number of Posts: 44

Total Words: 2565 (median 48 words per post)

Longest Post: #32831 (170 words)

Posted Most In: Poppin' Arms ~ Dorm Lounge (14 posts)

Number of Hopes: 1 (0.39 per 1000 words)


Persephone

Number of Posts: 347

Total Words: 21742 (median 50 words per post)

Longest Post: #26381 (632 words)

Posted Most In: Destiny Village ~ Sundae Park (41 posts)

Number of Hopes: 7 (0.32 per 1000 words)

Most Distinct Words for Persephone


Phoenixsong

Number of Posts: 123

Total Words: 28902 (median 211 words per post)

Longest Post: #29154 (1109 words)

Posted Most In: Tree of Life ~ Exterior (15 posts)

Number of Hopes: 13 (0.45 per 1000 words)

Most Distinct Words for Phoenixsong


Readerlove

Number of Posts: 52

Total Words: 4606 (median 81.5 words per post)

Longest Post: #45736 (279 words)

Posted Most In: Destiny Village ~ X-Eye Cauldron (29 posts)

Number of Hopes: 3 (0.65 per 1000 words)


RJR Basimilus

Number of Posts: 251

Total Words: 21914 (median 72 words per post)

Longest Post: #38753 (2108 words)

Posted Most In: Destiny Village ~ Sundae Park (44 posts)

Number of Hopes: 7 (0.32 per 1000 words)

Most Distinct Words for RJR Basimilus


Shadow of Antioch

Number of Posts: 39

Total Words: 4018 (median 100 words per post)

Longest Post: #41877 (213 words)

Posted Most In: Grass Continent ~ Treasure Town Beach (21 posts)

Number of Hopes: 2 (0.5 per 1000 words)


Shiny Phantump

Number of Posts: 583

Total Words: 46887 (median 63 words per post)

Longest Post: #38721 (1449 words)

Posted Most In: Sharpedo Bluff (59 posts)

Number of Hopes: 26 (0.55 per 1000 words)

Most Distinct Words for Shiny Phantump


Soniclink137

Number of Posts: 37

Total Words: 3486 (median 81 words per post)

Longest Post: #42169 (263 words)

Posted Most In: Mystery Continent ~ Northern Pines (23 posts)

Number of Hopes: 7 (2.01 per 1000 words)


SparklingEspeon

Number of Posts: 133

Total Words: 8889 (median 53 words per post)

Longest Post: #8031 (291 words)

Posted Most In: Treasure Town ~ Marketplace (33 posts)

Number of Hopes: 2 (0.22 per 1000 words)

Most Distinct Words for SparklingEspeon


Spiteful Murkrow

Number of Posts: 218

Total Words: 42377 (median 157 words per post)

Longest Post: #27665 (981 words)

Posted Most In: Rocky Road ~ House of Kate, Cabot, and Nip (34 posts)

Number of Hopes: 6 (0.14 per 1000 words)

Most Distinct Words for Spiteful Murkrow


Sudmensch

Number of Posts: 227

Total Words: 8112 (median 29 words per post)

Longest Post: #49610 (165 words)

Posted Most In: Rocky Road ~ House of Dill, Saltriv, Shiron, and Vix (61 posts)

Number of Hopes: 9 (1.11 per 1000 words)

Most Distinct Words for Sudmensch


Tanuki

Number of Posts: 285

Total Words: 30147 (median 92 words per post)

Longest Post: #15513 (620 words)

Posted Most In: Treasure Town ~ Marowak Dojo (60 posts)

Number of Hopes: 9 (0.3 per 1000 words)

Most Distinct Words for Tanuki


TheGOAT

Number of Posts: 576

Total Words: 91571 (median 130.5 words per post)

Longest Post: #47308 (1882 words)

Posted Most In: Destiny Colosseum (96 posts)

Number of Hopes: 35 (0.38 per 1000 words)

Most Distinct Words for TheGOAT


TheLastRanger13

Number of Posts: 18

Total Words: 1206 (median 69.5 words per post)

Longest Post: #43292 (176 words)

Posted Most In: Destiny Colosseum ~ Training Rooms (10 posts)

Number of Hopes: 1 (0.83 per 1000 words)


The Walrein

Number of Posts: 70

Total Words: 13853 (median 157 words per post)

Longest Post: #7520 (1089 words)

Posted Most In: Treasure Town ~ Main Square (17 posts)

Number of Hopes: 2 (0.14 per 1000 words)


Torchic

Number of Posts: 113

Total Words: 13042 (median 95 words per post)

Longest Post: #18043 (482 words)

Posted Most In: Marowak Dojo ~ Living Quarters (21 posts)

Number of Hopes: 3 (0.23 per 1000 words)

Most Distinct Words for Torchic


Umbramatic

Number of Posts: 31

Total Words: 741 (median 20 words per post)

Longest Post: #6566 (52 words)

Posted Most In: Treasure Town ~ Marowak Dojo (9 posts)

Number of Hopes: 0 (0 per 1000 words)


unrepentantAuthor

Number of Posts: 2811

Total Words: 330332 (median 97 words per post)

Longest Post: #12917 (1041 words)

Posted Most In: Destiny Colosseum (290 posts)

Number of Hopes: 164 (0.5 per 1000 words)

Most Distinct Words for unrepentantAuthor


Virgil134

Number of Posts: 958

Total Words: 77360 (median 59 words per post)

Longest Post: #49147 (878 words)

Posted Most In: Grass Continent ~ Treasure Town Beach (121 posts)

Number of Hopes: 12 (0.16 per 1000 words)

Most Distinct Words for Virgil134


windskull

Number of Posts: 371

Total Words: 40860 (median 98 words per post)

Longest Post: #25484 (390 words)

Posted Most In: Marowak Dojo ~ Living Quarters (58 posts)

Number of Hopes: 26 (0.64 per 1000 words)

Most Distinct Words for windskull