Skip to content

Commit 7e30538

Browse files
authored
Merge pull request #1 from infnetdanpro/develop
release 1.0
2 parents f497ff1 + 5390360 commit 7e30538

File tree

10 files changed

+398
-47
lines changed

10 files changed

+398
-47
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ venv2
44
.json
55
.env
66
.DS_Store
7+
*.pyc
8+
__pycache__

Procfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
web: gunicorn app:app
2-
worker: celery worker --app=lib.celery.app
2+
worker: celery -A tasks worker --concurrency 1 -E --loglevel=INFO

README.me

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,22 @@
1+
# Telegram chat stats bot
2+
[https://t.me/chat_stats_analytics_bot](https://t.me/chat_stats_analytics_bot)
13

4+
## How to use?
5+
0. Start the bot (you can use `/start`).
6+
1. Export chat history in `json` format
7+
2. Upload the json-file to chat with bot
8+
3. Wait the result
9+
4. Download result (html-format) and enjoy!
10+
11+
## What statistics are provided?
12+
1. Most common hashtags
13+
2. List of unique hashtags
14+
3. List of most replied users
15+
16+
17+
## How to add more?
18+
Use the feedback page: [Google Forms](https://forms.gle/oDhqB5GET79VFiat9)
19+
20+
21+
## How to support me?
22+
Donate links: (donate)[https://www.donationalerts.com/r/infnetdanpro] or BTC: `bc1qdq8nghl0z3tttlwyt3tuer0pe6z382u2kxt47c`

app.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,58 @@
1-
import telebot
1+
import json
22

3-
from flask import Flask
3+
import telebot
4+
from flask import Flask, request
45

56
from config import config
6-
7+
from tasks import run_task
78

89
bot = telebot.TeleBot(config["TELEGRAM_TOKEN"])
910
app = Flask(__name__)
1011

11-
import bot
12+
TOKEN = config["TELEGRAM_TOKEN"]
13+
14+
15+
@bot.message_handler(commands=["start"])
16+
def start(message):
17+
bot.reply_to(message, "Upload chat history for parsing data")
18+
19+
20+
@bot.message_handler(content_types=["document"])
21+
def handle_docs(message):
22+
# file_name = message.document.file_name
23+
file_id_info = bot.get_file(message.document.file_id)
24+
downloaded_file: bytes = bot.download_file(file_id_info.file_path)
25+
26+
# in memory
27+
file = json.loads(downloaded_file.decode("utf-8"))
28+
bot.reply_to(
29+
message,
30+
"Your data is analyzing. Bot will message you then the result will be ready.",
31+
)
32+
33+
# Put data in Celery App
34+
run_task.delay(reply_to=message.from_user.id, data_dict=file)
35+
36+
37+
@app.route("/" + TOKEN, methods=["POST"])
38+
def get_message():
39+
json_string = request.get_data().decode("utf-8")
40+
update = telebot.types.Update.de_json(json_string)
41+
bot.process_new_updates([update])
42+
return "!", 200
43+
44+
45+
@app.route("/echo", methods=["GET"])
46+
def echo():
47+
return "", 204
1248

1349

14-
if __name__ == "__main__":
15-
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 5000)))
50+
@app.route("/")
51+
def webhook():
52+
bot.remove_webhook()
53+
bot.set_webhook(url="https://tg-chat-analytics.herokuapp.com/" + TOKEN)
54+
return (
55+
'<center><h1><a href="https://t.me/chat_stats_analytics_bot">'
56+
"https://t.me/chat_stats_analytics_bot</a></h1></center>",
57+
200,
58+
)

celeryconfig.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from config import config
22

3-
CELERY_BROKER_URL = config.get("REDIS_URL", "redis://localhost:6379")
4-
CELERY_RESULT_BACKEND = config.get("REDIS_URL", "redis://localhost:6379")
5-
CELERY_TASK_SERIALIZER = "json"
6-
CELERY_RESULT_SERIALIZER = "json"
3+
broker_url = config.get("REDIS_URL", "redis://localhost:6379")
4+
result_backend = config.get("REDIS_URL", "redis://localhost:6379")
5+
result_serializer = "json"

lib/celery.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

lib/stat.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,23 @@
66
import ujson as json
77

88

9-
class EmptyFilenameException(Exception):
9+
class JSONLogParserEmptyData(Exception):
1010
pass
1111

1212

1313
class JSONLogParser:
14-
def __init__(self, file_name: AnyStr):
15-
if not file_name:
16-
raise EmptyFilenameException("You should to put correct filename!")
14+
def __init__(self, file_name: AnyStr = None, data_dict: dict = None):
15+
if not any([file_name, data_dict]):
16+
raise JSONLogParserEmptyData(
17+
"You should put data into class: file_name or data_dict"
18+
)
1719
self.filename: AnyStr = file_name
18-
self.data: Dict = JSONLogParser.parse(file_name)
20+
if self.filename:
21+
# parse file from disk
22+
self.data: Dict = JSONLogParser.parse(file_name)
23+
else:
24+
# get data from dict
25+
self.data = data_dict
1926
self.name: Union[AnyStr, None] = self.data.get("name")
2027
self.type: Union[AnyStr, None] = self.data.get("type")
2128
self.id: Union[int, None] = self.data.get("id")
@@ -144,6 +151,7 @@ def get_user_stats(self):
144151
self.get_user_messages()
145152

146153
return {
154+
"sociable_users": self.sociable_users,
147155
"users": self.users,
148156
"messages": self.messages_by_users,
149157
}
@@ -193,14 +201,20 @@ def clear_text(sentence: str) -> str:
193201
messages.extend(text)
194202
return messages
195203

196-
def get_words_stats(self, words: List[str]):
204+
def get_words_stats(self):
197205
counter = Counter()
206+
words = self.get_all_words()
198207
morph = pymorphy2.MorphAnalyzer()
199208
for word in words:
200209
# TODO: threads here?>
201-
counter[morph.parse(word)[0].normal_form] += 1
210+
parsed_word = morph.parse(word)[0]
211+
if "NOUN" in parsed_word.tag:
212+
counter[parsed_word.normal_form] += 1
213+
elif "VERB" in parsed_word.tag:
214+
counter[parsed_word.normal_form] += 1
202215

203216
self.words_stats = counter
217+
return {"word_stats": self.words_stats}
204218

205219
# most replied user
206220
def get_most_replied_user(self):
@@ -222,15 +236,23 @@ def get_most_replied_user(self):
222236
message["id"]
223237
]
224238

225-
authors_items = []
239+
authors_items_users = {}
226240
for from_id, values in authors_replied_messages.items():
227-
authors_items.append(
228-
{"from_id": from_id, "count": len(values), "from": users[from_id]}
229-
)
230-
authors_items = sorted(authors_items, key=lambda x: -x["count"])
231-
self.users_replies_stats: List[Dict] = authors_items
241+
authors_items_users[users[from_id]] = len(values)
242+
self.users_replies_stats: List[Dict] = authors_items_users
232243
self.users_replies_data: List[Dict] = authors_replied_messages
233244

245+
return {"most_replies": self.users_replies_stats}
246+
247+
# main function
248+
def generate_stats(self):
249+
return {
250+
**self.tags_stats(),
251+
**self.get_user_stats(),
252+
**self.get_words_stats(),
253+
**self.get_most_replied_user(),
254+
}
255+
234256

235257
if __name__ == "__main__":
236258
filename = "result.json"

tasks.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import os
2+
from io import StringIO
3+
4+
import celery
5+
import telebot
6+
from jinja2 import Environment, FileSystemLoader, ChoiceLoader, select_autoescape
7+
8+
from config import config
9+
from lib.stat import JSONLogParser
10+
11+
app = celery.Celery("celery_app")
12+
app.config_from_object("celeryconfig")
13+
14+
bot = telebot.TeleBot(config["TELEGRAM_TOKEN"])
15+
16+
path = os.path.join(os.getcwd(), "templates")
17+
loader = ChoiceLoader([FileSystemLoader(path)])
18+
env = Environment(
19+
loader=loader,
20+
autoescape=select_autoescape(),
21+
)
22+
23+
24+
def prepare_template_data(report):
25+
hashtags_frequency = {
26+
k: v
27+
for k, v in sorted(
28+
report["hashtags_frequency"].items(), key=lambda item: -item[1]
29+
)
30+
}
31+
hashtags_frequency = {
32+
"chart_labels": [f"'{e}'" for e in hashtags_frequency.keys()],
33+
"chart_values": [e for e in hashtags_frequency.values()],
34+
}
35+
36+
word_stats = {
37+
k: v
38+
for k, v in sorted(report["word_stats"].items(), key=lambda item: -item[1])
39+
if v > 1
40+
}
41+
word_stats = {
42+
"chart_labels": [f"'{e}'" for e in word_stats.keys()][:250],
43+
"chart_values": [e for e in word_stats.values()][:250],
44+
}
45+
46+
most_replies = {
47+
k: v
48+
for k, v in sorted(report["most_replies"].items(), key=lambda item: -item[1])
49+
if v >= 1
50+
}
51+
most_replies = {
52+
"chart_labels": [f"'{e}'" for e in most_replies.keys()][:250],
53+
"chart_values": [e for e in most_replies.values()][:250],
54+
}
55+
56+
unique_hashtags = report["unique_hashtags"]
57+
58+
data = {
59+
"hashtags_frequency": {
60+
"data": hashtags_frequency,
61+
"type": "pie",
62+
"title": "Hashtag frequency (all)",
63+
},
64+
"unique_hashtags": {
65+
"data": unique_hashtags,
66+
"type": "cloud",
67+
"title": "List of unique of hashtags (all)",
68+
},
69+
"word_stats": {
70+
"data": word_stats,
71+
"type": "pie",
72+
"title": "Word stats (top 250)",
73+
},
74+
"most_replies": {
75+
"data": most_replies,
76+
"type": "bar",
77+
"title": "Most replied users (top 250)",
78+
},
79+
}
80+
81+
return data
82+
83+
84+
@app.task
85+
def run_task(reply_to: int, data_dict: dict):
86+
data = JSONLogParser(data_dict=data_dict)
87+
report = data.generate_stats()
88+
template = env.get_template("report.html")
89+
90+
# prepare data and render template
91+
chart_data = prepare_template_data(report=report)
92+
template_data = template.render(chart_data=chart_data, chat_name=data.name)
93+
94+
obj = StringIO(template_data)
95+
obj.name = f"{data.id}.html"
96+
bot.send_document(chat_id=reply_to, document=obj, caption="your stats result file")
97+
98+
# bot.send_message(reply_to, 'Hello!!!')

0 commit comments

Comments
 (0)