Skip to content

Commit 6d54c86

Browse files
committed
Add /stats command
1 parent b003445 commit 6d54c86

File tree

4 files changed

+90
-2
lines changed

4 files changed

+90
-2
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ an admin, simply send the command with a user mention following it, and confirm
4040
`/deleteuserdata @some_user`. As with the `/msg` command, just type the `@` character and select a user from the dropdown that
4141
opens.
4242

43+
### /stats
44+
The `/stats` command shows a list of each user's top five most distinguishing words - words they use the most, compared
45+
to everyone else in the group.
46+
4347
## Running the Bot
4448

4549
Create a Telegram bot via @BotFather. Take down your bot's access token, and set its privacy mode to disabled so it can
@@ -50,6 +54,7 @@ read all messages in its groups. If privacy mode is enabled, the bot won't be ab
5054
deletemydata - Delete your Markov chain data in this group
5155
deletemessagedata - Delete a message from your Markov chain data in this group
5256
deleteuserdata - (Admin only) Delete a user's Markov chain data in this group
57+
stats - Display user statistics
5358

5459
Download the precompiled jar file from the latest [release](https://github.com/ClockVapor/markov-telegram-bot/releases)
5560
or build it yourself with Maven if you like. If you aren't building it yourself, skip the next paragraph.

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
<groupId>clockvapor.telegram.markov-telegram-bot</groupId>
99
<artifactId>markov-telegram-bot</artifactId>
10-
<version>0.1.1</version>
10+
<version>0.2.1</version>
1111

1212
<properties>
1313
<kotlin.version>1.3.21</kotlin.version>

src/main/kotlin/clockvapor/telegram/markov/MarkovTelegramBot.kt

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
113113
matchesCommand(e0Text, "msgall") ->
114114
doMessageTotalCommand(bot, message, chatId, text, entities)
115115

116+
matchesCommand(e0Text, "stats") ->
117+
doStatisticsCommand(bot, message, chatId, text)
118+
116119
matchesCommand(e0Text, "deletemydata") ->
117120
doDeleteMyDataCommand(bot, message, chatId, senderId)
118121

@@ -136,6 +139,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
136139
val deleteMessageData = wantToDeleteMessageData[chatId]
137140

138141
if (deleteOwnData?.contains(senderId) == true) {
142+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
139143
shouldAnalyzeMessage = false
140144
deleteOwnData -= senderId
141145
if (deleteOwnData.isEmpty()) {
@@ -152,6 +156,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
152156
}
153157
reply(bot, message, replyText)
154158
} else if (deleteUserData?.contains(senderId) == true) {
159+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
155160
shouldAnalyzeMessage = false
156161
val userIdToDelete = deleteUserData[senderId]!!
157162
deleteUserData -= senderId
@@ -169,6 +174,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
169174
}
170175
reply(bot, message, replyText)
171176
} else if (deleteMessageData?.contains(senderId) == true) {
177+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
172178
shouldAnalyzeMessage = false
173179
val messageToDelete = deleteMessageData[senderId]!!
174180
deleteMessageData -= senderId
@@ -193,6 +199,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
193199
private fun doMessageCommand(bot: Bot, message: Message, chatId: String, text: String,
194200
entities: List<MessageEntity>) {
195201

202+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
196203
var parseMode: ParseMode? = null
197204

198205
val replyText = if (entities.size < 2) {
@@ -237,6 +244,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
237244
private fun doMessageTotalCommand(bot: Bot, message: Message, chatId: String, text: String,
238245
entities: List<MessageEntity>) {
239246

247+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
240248
val e0 = entities[0]
241249
val remainingTexts = text.substring(e0.offset + e0.length).trim().takeIf { it.isNotBlank() }
242250
?.split(whitespaceRegex).orEmpty()
@@ -257,7 +265,32 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
257265
reply(bot, message, replyText)
258266
}
259267

268+
private fun doStatisticsCommand(bot: Bot, message: Message, chatId: String, text: String) {
269+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
270+
val markovPaths = getAllPersonalMarkovPaths(chatId)
271+
val userIdToWordCountsMap = markovPaths
272+
.mapNotNull { path ->
273+
tryOrNull { MarkovChain.read(path) }
274+
?.let { Pair(File(path).nameWithoutExtension, it.wordCounts) }
275+
}
276+
.toMap()
277+
val universe = computeUniverse(userIdToWordCountsMap.values)
278+
val listText = userIdToWordCountsMap.mapNotNull { (userId, wordCounts) ->
279+
val response = bot.getChatMember(chatId.toLong(), userId.toLong())
280+
val chatMember = response.first?.body()?.result
281+
if (chatMember != null) {
282+
val mostDistinguishingWords = scoreMostDistinguishingWords(wordCounts, universe).keys.take(5)
283+
"${chatMember.user.displayName}\n" +
284+
mostDistinguishingWords.mapIndexed { i, word -> "${i + 1}. $word" }.joinToString("\n")
285+
} else null
286+
}.filter { it.isNotBlank() }.joinToString("\n\n")
287+
val replyText = if (listText.isBlank()) "<no data available>"
288+
else "Most distinguishing words:\n\n$listText"
289+
reply(bot, message, replyText)
290+
}
291+
260292
private fun doDeleteMyDataCommand(bot: Bot, message: Message, chatId: String, senderId: String) {
293+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
261294
wantToDeleteOwnData.getOrPut(chatId) { mutableSetOf() } += senderId
262295
val replyText = "Are you sure you want to delete your Markov chain data in this group? " +
263296
"Say \"yes\" to confirm, or anything else to cancel."
@@ -267,6 +300,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
267300
private fun doDeleteUserDataCommand(bot: Bot, message: Message, chatId: String, from: User, senderId: String,
268301
entities: List<MessageEntity>) {
269302

303+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
270304
var parseMode: ParseMode? = null
271305
val replyText = if (isAdmin(bot, message.chat, from.id)) {
272306
if (entities.size < 2) {
@@ -299,6 +333,7 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
299333
}
300334

301335
private fun doDeleteMessageDataCommand(bot: Bot, message: Message, chatId: String, senderId: String) {
336+
bot.sendChatAction(chatId.toLong(), ChatAction.TYPING)
302337
val replyText = message.replyToMessage?.let { replyToMessage ->
303338
replyToMessage.from?.takeIf { it.id.toString() == senderId }?.let { replyToMessageFrom ->
304339
wantToDeleteMessageData.getOrPut(chatId) { mutableMapOf() }[senderId] = replyToMessage.text ?: ""
@@ -413,9 +448,13 @@ class MarkovTelegramBot(private val token: String, private val dataPath: String)
413448
}
414449

415450
private fun readAllPersonalMarkov(chatId: String): List<MarkovChain> =
451+
getAllPersonalMarkovPaths(chatId)
452+
.map { MarkovChain.read(it) }
453+
454+
private fun getAllPersonalMarkovPaths(chatId: String): List<String> =
416455
File(getChatPath(chatId)).listFiles()
417456
.filter { !it.name.endsWith("total.json") }
418-
.map { MarkovChain.read(it.path) }
457+
.map { it.path }
419458

420459
private fun getMarkovPath(chatId: String, userId: String): String =
421460
Paths.get(getChatPath(chatId), "$userId.json").toString()
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package clockvapor.telegram.markov
2+
3+
import clockvapor.markov.MarkovChain
4+
5+
val MarkovChain.wordCounts: Map<String, Int>
6+
get() {
7+
val map = hashMapOf<String, Int>()
8+
for ((_, dataMap) in data) {
9+
for ((word, count) in dataMap) {
10+
val sanitized = word.sanitize()
11+
if (sanitized.isNotBlank()) {
12+
map.compute(sanitized.toLowerCase()) { _, n -> n?.plus(count) ?: count }
13+
}
14+
}
15+
}
16+
return map
17+
}
18+
19+
fun scoreMostDistinguishingWords(user: Map<String, Int>, universe: Map<String, Int>): Map<String, Double> {
20+
val scores = linkedMapOf<String, Double>()
21+
val userTotal = user.values.sum()
22+
val universeTotal = universe.values.sum()
23+
for ((word, count) in user) {
24+
scores[word] = Math.pow(count.toDouble(), 1.1) / userTotal * (universeTotal / universe.getValue(word))
25+
}
26+
return scores.toList().sortedByDescending { it.second }.toMap()
27+
}
28+
29+
fun computeUniverse(wordCountsCollection: Collection<Map<String, Int>>): Map<String, Int> {
30+
val universe = hashMapOf<String, Int>()
31+
for (wordCounts in wordCountsCollection) {
32+
for ((word, count) in wordCounts) {
33+
universe.compute(word) { _, n -> n?.plus(count) ?: count }
34+
}
35+
}
36+
return universe
37+
}
38+
39+
private const val punctuation = "[`~!@#$%^&*()\\-_=+\\[\\],<.>/?\\\\|;:\"]+"
40+
private fun String.sanitize(): String =
41+
replace("", "\"").replace("", "\"").replace("", "'").replace("", "'")
42+
.replace(Regex("^$punctuation"), "")
43+
.replace(Regex("$punctuation$"), "")
44+

0 commit comments

Comments
 (0)