Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
145 commits
Select commit Hold shift + click to select a range
b98b4fe
Create LICENSE
carloalbertobarbano Mar 1, 2019
8e86740
added dependencies and requirements
carloalbertobarbano Mar 1, 2019
c94ec8d
added src
carloalbertobarbano Mar 1, 2019
35506c5
added travis.yml
carloalbertobarbano Mar 5, 2019
f38cfa0
support for document audio
carloalbertobarbano Mar 5, 2019
4730aa6
updated gitignore
carloalbertobarbano Mar 5, 2019
c222005
removed traslator/__pycache__
carloalbertobarbano Mar 5, 2019
077b103
fixed indentation
carloalbertobarbano Mar 6, 2019
f4e6096
added filesize limit error
carloalbertobarbano Mar 6, 2019
ed3376c
added filter for owner commands
carloalbertobarbano Mar 6, 2019
2587323
refactored filters for commands
carloalbertobarbano Mar 6, 2019
8e1ccf4
added users command for admins
carloalbertobarbano Mar 6, 2019
a96fd83
active cache initialized from db
carloalbertobarbano Mar 6, 2019
4812129
added handlers for new/left chat member
carloalbertobarbano Mar 6, 2019
a402ddb
typo
carloalbertobarbano Mar 6, 2019
34a2edd
moved handlers to separate files
carloalbertobarbano Mar 6, 2019
1fec2df
removed unused vars
carloalbertobarbano Mar 7, 2019
a33c908
added basic split on silence
carloalbertobarbano Mar 7, 2019
7a04f99
added IT localization
carloalbertobarbano Mar 7, 2019
db7ca0c
added space between chunks
carloalbertobarbano Mar 8, 2019
2c62de3
Merge branch 'master' of github.com:charslab/TranscriberBot
carloalbertobarbano Mar 8, 2019
e957a50
dropped mp4 support (gifs)
carloalbertobarbano Mar 8, 2019
9822079
updated strings-IT
carloalbertobarbano Mar 8, 2019
5c78360
fixed string placeholder substitution
carloalbertobarbano Mar 9, 2019
8e3efdc
updated strings
carloalbertobarbano Mar 12, 2019
bfb7c41
added broadcast message
carloalbertobarbano Mar 12, 2019
694276e
removed unnecessary if-else
carloalbertobarbano Mar 12, 2019
1dff50f
updated requirements
carloalbertobarbano Mar 26, 2019
07736e6
removed print
carloalbertobarbano Mar 26, 2019
7abd244
dropped singleton db (test)
carloalbertobarbano Mar 26, 2019
b188c5b
Fix default chat entry missing
carloalbertobarbano May 23, 2019
c004873
default photos_enabled=0
carloalbertobarbano May 24, 2019
7c3c452
fix query
carloalbertobarbano May 24, 2019
8116b37
fix translate command handler
carloalbertobarbano May 26, 2019
8d5fe6b
update version
carloalbertobarbano Jul 7, 2019
75d8f51
Added PTBR Translation
ThigSchuch Jul 7, 2019
764cdbc
Translation to Spanish(es-ES)
vetu11 Jul 8, 2019
2190d70
Merge pull request #1 from ThigSchuch/PTBR-Translation
carloalbertobarbano Jul 8, 2019
a841d44
Merge pull request #3 from vetu11/translation-es-ES
carloalbertobarbano Jul 8, 2019
978e883
Fix strings_pt-BR
carloalbertobarbano Jul 9, 2019
93549b2
Fix None wit.ai response due to segments length
MaanuelMM Mar 13, 2020
1759dcd
Merge pull request #6 from MaanuelMM/patch-1
carloalbertobarbano Mar 14, 2020
2bbaa0f
Fixed python-telegram-bot transition to v12 issues
Davte May 23, 2020
0cd3eaf
Fixed wit.ai-related issue
Davte May 23, 2020
8bb19f8
Print statement removed
Davte May 23, 2020
3adcd4f
Merge pull request #8 from Davte/master
carloalbertobarbano May 23, 2020
06bd384
Merge pull request #9 from charslab/developement
carloalbertobarbano May 24, 2020
c54d4b4
Added Dockerfile and updated the version of python-telegram-bot in th…
stefanodelbosco Jul 29, 2020
f0766a8
Added name of container in dockerRun
stefanodelbosco Jul 29, 2020
8946c70
Added instructions for installation in README file
stefanodelbosco Jul 29, 2020
cf293e9
Update README.md fixed badge
stefanodelbosco Jul 29, 2020
47ba6ef
Updated .gitignore
stefanodelbosco Jul 29, 2020
f75a9b5
Update README.md fixed badge
stefanodelbosco Jul 29, 2020
3e21136
Added virtualenv
stefanodelbosco Aug 4, 2020
36a1c24
Update README.md
stefanodelbosco Aug 4, 2020
1146c16
Merge pull request #10 from stefanodelbosco/master
carloalbertobarbano Oct 1, 2020
64821e8
Update handlers_messages.py
carloalbertobarbano Jan 14, 2021
cb7857d
Organize imports according to PEP8 and remove spaces
turicas Feb 25, 2021
8f85b0c
Use specific logger instead of logging module
turicas Feb 25, 2021
2b49849
Refactor speech.py and add CLI
turicas Feb 25, 2021
027adb8
Add space between each speech chunk
turicas Feb 25, 2021
695be98
Merge pull request #16 from charslab/master
carloalbertobarbano Feb 26, 2021
e4b930d
Merge pull request #17 from charslab/developement
carloalbertobarbano Feb 26, 2021
cb43519
Merge pull request #14 from turicas/enhancements/speech
carloalbertobarbano Feb 26, 2021
f34edfe
Merge pull request #18 from charslab/developement
carloalbertobarbano Feb 26, 2021
bfbad44
Add more ignores to gitignore
turicas Feb 28, 2021
c41db13
Add a Docker ignore list
turicas Feb 28, 2021
ab611f3
Enhance Dockerfile
turicas Feb 28, 2021
adf3fed
Use python-slim instead of buster as base image
turicas Feb 28, 2021
b901f10
Fix telegram.json example
turicas Feb 28, 2021
535ad8c
Use requests.Session instead of separate requests
turicas Feb 28, 2021
bc9c5e7
Merge pull request #19 from turicas/enhancement/optimize-dockerfile
stefanodelbosco Mar 1, 2021
65fc714
Merge pull request #21 from charslab/master
stefanodelbosco Mar 1, 2021
765724b
Merge pull request #22 from charslab/developement
stefanodelbosco Mar 1, 2021
f1334a5
Create Action for docker build and push
stefanodelbosco Mar 1, 2021
bc48c31
Merge pull request #20 from turicas/enhancement/optimize-wit-requests
carloalbertobarbano Mar 2, 2021
17fecc7
Merge pull request #23 from charslab/developement
carloalbertobarbano Mar 2, 2021
c51ea47
Add support for video extensions (+ more audio)
turicas Mar 4, 2021
2d4958b
Refactor if/elif/else
turicas Mar 4, 2021
cb5172b
Fix ellipsis representation
turicas Mar 4, 2021
cdc7226
Added user transcriber in Dockerfile and fix install tesseract
stefanodelbosco Mar 4, 2021
0c16b76
Added html escape in image ocr response
stefanodelbosco Mar 4, 2021
6d21a4b
Merge pull request #25 from stefanodelbosco/master
carloalbertobarbano Mar 5, 2021
29491c7
Restrict audio transcription from video to private chats
turicas Mar 7, 2021
49761f9
Add config for max media voice file size
turicas Mar 7, 2021
fec5948
Merge pull request #24 from turicas/feature/transcribe-video-files
carloalbertobarbano Mar 9, 2021
599865d
Update punctuations mark (ellipses) for pt-BR
AlexDicy Mar 23, 2021
90fe977
Update punctuations mark (ellipses) for it-IT
AlexDicy Mar 23, 2021
0d1ec6d
Update punctuations mark (ellipses)
AlexDicy Mar 23, 2021
9b55182
Fix minor puntuaction typos and translate an untranslated word
AlexDicy Mar 23, 2021
2891687
Merge pull request #30 from AlexDicy/master
carloalbertobarbano Mar 24, 2021
66b3119
Remove translation prefixes on private chats
turicas Mar 26, 2021
349288f
Merge pull request #32 from turicas/enhancement/remove-text-prefix
carloalbertobarbano Mar 29, 2021
5118a85
Improved English strings
dodekaphilist Apr 17, 2021
807c6c2
Added German translation
dodekaphilist Apr 17, 2021
2c9a8bb
Update values/strings.xml
dodekaphilist Apr 17, 2021
3235c63
Update strings.xml
dodekaphilist Apr 17, 2021
77e0316
Update strings_de-DE.xml
dodekaphilist Apr 17, 2021
2b5cb09
Merge pull request #33 from dodekaphilist/master
carloalbertobarbano Jun 19, 2022
fe18ee6
Update docker_build_push.yml
stefanodelbosco Jun 19, 2022
a08edcd
Update docker_build_push.yml
stefanodelbosco Jun 19, 2022
827dfad
Merge branch 'master' into developement
stefanodelbosco Jun 19, 2022
6df3480
Merge pull request #44 from charslab/developement
stefanodelbosco Jun 19, 2022
d1ed337
Added support for anonymous Admins
stefanodelbosco Jun 19, 2022
b88f4f3
Merge pull request #45 from charslab/28-commands-from-anonymous-admin…
stefanodelbosco Jun 19, 2022
3598cb1
split backend
carloalbertobarbano Feb 14, 2025
98c5c00
rename build scripts
carloalbertobarbano Feb 14, 2025
9fc7047
add whisper backend
carloalbertobarbano Feb 14, 2025
0582fbc
remove workflow
carloalbertobarbano Feb 14, 2025
77a1ea4
[wip] refactor to wonda
carloalbertobarbano Feb 15, 2025
8301631
fix indent
carloalbertobarbano Feb 15, 2025
cf1b413
[wip] add wonda
carloalbertobarbano Feb 15, 2025
e6f900f
[wip] upgrade to latest PTB
carloalbertobarbano Feb 15, 2025
1b3efcf
[wip] upgrade to latest PTB
carloalbertobarbano Feb 15, 2025
643ca06
[wip] upgrade to latest PTB
carloalbertobarbano Feb 15, 2025
6a14388
debug print
carloalbertobarbano Feb 15, 2025
a83a440
fix asyncio transcription
carloalbertobarbano Feb 16, 2025
0876cfe
complete support for audio/video
carloalbertobarbano Feb 16, 2025
dc3c0b6
reformat code (PEP)
carloalbertobarbano Feb 16, 2025
81c8434
add logging & chat member handler
carloalbertobarbano Feb 16, 2025
2b2591b
fix async chat_admin filter
carloalbertobarbano Feb 16, 2025
284992a
optimize db usage
carloalbertobarbano Feb 16, 2025
b52f30b
fix async chat_admin filter
carloalbertobarbano Feb 16, 2025
2edc4ed
add chat member handler
carloalbertobarbano Feb 16, 2025
634cabe
add todo
carloalbertobarbano Feb 16, 2025
cc761a0
add anonymous admin
carloalbertobarbano Feb 16, 2025
4c6ee90
add sentry config
carloalbertobarbano Feb 16, 2025
af2d558
add sentry
carloalbertobarbano Feb 16, 2025
6c54c78
check if no extension is present
carloalbertobarbano Feb 16, 2025
6de910e
add logging
carloalbertobarbano Feb 16, 2025
72b0170
handle animations
carloalbertobarbano Feb 16, 2025
4576f54
add photo handlers
carloalbertobarbano Feb 18, 2025
986958d
always enable photos for private chats
carloalbertobarbano Feb 18, 2025
8e05bda
add teseract_path in config
carloalbertobarbano Feb 19, 2025
3cc02a2
update base docker image
carloalbertobarbano Feb 19, 2025
bf61203
add build action
carloalbertobarbano Feb 19, 2025
7fa9337
update run.sh
carloalbertobarbano Feb 19, 2025
59fbc21
fix prop key
carloalbertobarbano Feb 19, 2025
964f16e
remove prints
carloalbertobarbano Feb 19, 2025
b8cf408
log chat_id for TypeError in get_chat_voice_enabled
carloalbertobarbano Feb 19, 2025
ee62b74
add audio queue
carloalbertobarbano Feb 19, 2025
bd45046
improve stats command
carloalbertobarbano Feb 19, 2025
6a00724
use exc_info in logging
carloalbertobarbano Feb 20, 2025
9150fd0
Merge branch 'developement' into ptb-async
carloalbertobarbano Feb 20, 2025
4334fbd
remove old docker scripts
carloalbertobarbano Feb 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/build_image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: DockerBuildAndPush

on:
push:
branches:
- master
- developement
- ptb-async

env:
IMAGE_NAME: transcriberbot

jobs:
push:
runs-on: ubuntu-latest
if: github.event_name == 'push'

steps:
- uses: actions/checkout@v2

- name: Login to ghcr registry
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin

- name: Build image
run: docker build . --file Dockerfile --tag $IMAGE_NAME

- name: Push image
run: |
IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME
# Change all uppercase to lowercase
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
# Strip git ref prefix from version
VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
# Strip "v" prefix from tag name
[[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
# Use Docker `latest` tag convention
[ "$VERSION" == "master" ] && VERSION=latest
echo IMAGE_ID=$IMAGE_ID
echo VERSION=$VERSION
docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
docker push $IMAGE_ID:$VERSION
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# TranscriberBot-specific ignores
config/
media/

# Generic data-related ignores
Expand Down
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
transcriber-bot-wonda
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.12.0-slim

# Set global configs
WORKDIR /
Expand Down
File renamed without changes.
12 changes: 12 additions & 0 deletions config/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,24 @@
"webm"
],

"ocr": {
"tesseract_path": "/usr/share/tesseract-ocr/5/tessdata/"
},

"antiflood": {
"age_threshold": 10,
"flood_ratio": 2,
"max_flood_ratio": 6,
"time_threshold_warning": 4,
"time_threshold_flood": 5,
"timeout": 10
},

"whisper": {
"api_endpoint": "http://127.0.0.1:8000"
},

"logging": {
"level": "APP"
}
}
3 changes: 3 additions & 0 deletions config/sentry.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"dsn": "xxx"
}
8 changes: 0 additions & 8 deletions dockerRun.sh

This file was deleted.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
python-telegram-bot==12.3.0
python-telegram-bot
coloredlogs
pillow
watchdog
tesserocr
pydub
zbarlight
requests
sentry-sdk
15 changes: 15 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh

docker pull ghcr.io/charslab/transcriberbot:ptb-async
docker run \
-e LC_ALL=C \
-d --restart unless-stopped \
--name "transcriberbot-async" \
-v "$(pwd)"/data:/data \
-v "$(pwd)"/config:/config \
-v "$(pwd)"/values:/values \
-v "$(pwd)"/media:/media \
--cpus=4.0 \
--memory=3000m \
-u "$(id -u):1337" \
ghcr.io/charslab/transcriberbot:ptb-async
2 changes: 1 addition & 1 deletion src/antiflood/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from antiflood.antiflood import register_flood_warning_callback
from antiflood.antiflood import register_flood_started_callback
from antiflood.antiflood import register_flood_ended_callback
from antiflood.antiflood import init
from antiflood.antiflood import init
131 changes: 67 additions & 64 deletions src/antiflood/antiflood.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

logger = logging.getLogger(__name__)

flood_ratio = 2 # messages/seconds
flood_ratio = 2 # messages/seconds
max_flood_ratio = 10
time_threshold_warning = 5 # ratio > flood_ratio for {time_threshold_warning} seconds
time_threshold_flood = 10 # ratio > flood_ratio for {time_threshold_flood} seconds
timeout = 4 # flood ends after ratio < flood_ratio for {timeout} seconds
time_threshold_flood = 10 # ratio > flood_ratio for {time_threshold_flood} seconds
timeout = 4 # flood ends after ratio < flood_ratio for {timeout} seconds

callback_flood_warning = None
callback_flood_started = None
Expand All @@ -20,74 +20,77 @@
# chat_id -> (level, ratio, msg_num, duration, last_update)
stats = {}

def register_flood_warning_callback(callback):
global callback_flood_warning
callback_flood_warning = callback

def register_flood_started_callback(callback):
global callback_flood_started
callback_flood_started = callback

def register_flood_ended_callback(callback):
global callback_flood_ended
callback_flood_ended = callback

def init():
global flood_ratio, max_flood_ratio, time_threshold_warning, time_threshold_flood, timeout
flood_ratio = config.get_config_prop("app")["antiflood"]["flood_ratio"]
max_flood_ratio = config.get_config_prop("app")["antiflood"]["max_flood_ratio"]
time_threshold_warning = config.get_config_prop("app")["antiflood"]["time_threshold_warning"]
time_threshold_flood = config.get_config_prop("app")["antiflood"]["time_threshold_flood"]
timeout = config.get_config_prop("app")["antiflood"]["timeout"]

logger.info("Ratio: %d", flood_ratio)
logger.info("Max flood ratio: %d", max_flood_ratio)
logger.info("Thr warning: %d", time_threshold_warning)
logger.info("Thr flood: %d", time_threshold_flood)
logger.info("Timeout: %d", timeout)

def on_chat_msg_received(chat_id):
global flood_ratio, time_threshold_warning, time_threshold_flood, timeout
global callback_flood_warning, callback_flood_started, callback_flood_ended

curr_time = time.time()

if chat_id not in stats:
stats[chat_id] = [LEVEL_NORMAL, 1.0, 1, 0.0, curr_time]
def register_flood_warning_callback(callback):
global callback_flood_warning
callback_flood_warning = callback

else:
level, ratio, msg_num, duration, last_update = stats[chat_id]
updated_duration = duration + curr_time - last_update
msg_num += 1
curr_ratio = msg_num / updated_duration

if curr_ratio < flood_ratio and updated_duration > timeout:
curr_ratio, updated_duration, msg_num = 0, 0, 0
level = LEVEL_NORMAL
if callback_flood_ended:
callback_flood_ended(chat_id)
def register_flood_started_callback(callback):
global callback_flood_started
callback_flood_started = callback

elif updated_duration > 1 and curr_ratio > max_flood_ratio and level < LEVEL_FLOOD:
level = LEVEL_FLOOD
logger.warning("Flood ratio for chat %d is over the top", chat_id)
if callback_flood_started:
callback_flood_started(chat_id)

elif curr_ratio > flood_ratio:
if updated_duration >= time_threshold_flood and level < LEVEL_FLOOD:
logger.warning("Flood detected for chat %d", chat_id)
level = LEVEL_FLOOD
if callback_flood_started:
callback_flood_started(chat_id)
def register_flood_ended_callback(callback):
global callback_flood_ended
callback_flood_ended = callback

elif updated_duration >= time_threshold_warning and level < LEVEL_WARNING:
logger.info("Potential flood for chat %d", chat_id)
level = LEVEL_WARNING
if callback_flood_warning is not None:
callback_flood_warning(chat_id)

stats[chat_id] = (level, curr_ratio, msg_num, updated_duration, curr_time)
def init():
global flood_ratio, max_flood_ratio, time_threshold_warning, time_threshold_flood, timeout
flood_ratio = config.get_config_prop("app")["antiflood"]["flood_ratio"]
max_flood_ratio = config.get_config_prop("app")["antiflood"]["max_flood_ratio"]
time_threshold_warning = config.get_config_prop("app")["antiflood"]["time_threshold_warning"]
time_threshold_flood = config.get_config_prop("app")["antiflood"]["time_threshold_flood"]
timeout = config.get_config_prop("app")["antiflood"]["timeout"]

logger.info("stats[{}]: {}".format(chat_id, stats[chat_id]))
logger.info("Ratio: %d", flood_ratio)
logger.info("Max flood ratio: %d", max_flood_ratio)
logger.info("Thr warning: %d", time_threshold_warning)
logger.info("Thr flood: %d", time_threshold_flood)
logger.info("Timeout: %d", timeout)


def on_chat_msg_received(chat_id):
global flood_ratio, time_threshold_warning, time_threshold_flood, timeout
global callback_flood_warning, callback_flood_started, callback_flood_ended

curr_time = time.time()

if chat_id not in stats:
stats[chat_id] = [LEVEL_NORMAL, 1.0, 1, 0.0, curr_time]

else:
level, ratio, msg_num, duration, last_update = stats[chat_id]
updated_duration = duration + curr_time - last_update
msg_num += 1
curr_ratio = msg_num / updated_duration

if curr_ratio < flood_ratio and updated_duration > timeout:
curr_ratio, updated_duration, msg_num = 0, 0, 0
level = LEVEL_NORMAL
if callback_flood_ended:
callback_flood_ended(chat_id)

elif updated_duration > 1 and curr_ratio > max_flood_ratio and level < LEVEL_FLOOD:
level = LEVEL_FLOOD
logger.warning("Flood ratio for chat %d is over the top", chat_id)
if callback_flood_started:
callback_flood_started(chat_id)

elif curr_ratio > flood_ratio:
if updated_duration >= time_threshold_flood and level < LEVEL_FLOOD:
logger.warning("Flood detected for chat %d", chat_id)
level = LEVEL_FLOOD
if callback_flood_started:
callback_flood_started(chat_id)

elif updated_duration >= time_threshold_warning and level < LEVEL_WARNING:
logger.info("Potential flood for chat %d", chat_id)
level = LEVEL_WARNING
if callback_flood_warning is not None:
callback_flood_warning(chat_id)

stats[chat_id] = (level, curr_ratio, msg_num, updated_duration, curr_time)

logger.info("stats[{}]: {}".format(chat_id, stats[chat_id]))
2 changes: 1 addition & 1 deletion src/audiotools/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from audiotools.speech import transcribe
from audiotools.speech import transcribe
Loading