Added audio downloading, switched to yt-dlp

- enabling and disabling mods using config.json works now - switched to yt-dlp (it supports YouTube without problems) - added audio downloading - changed 'mvd' mod tag to 'md' - added filesize in qualities list - changed thumbnail size 280x280 at most - .webm and .mhtml extensions are ignored now - fixed encoding errors in utils.get_md5 (they are ignored now, so not an actual fix, but it 100% won't break anythink)
2025-11-24 00:31:03 +03:00
parent fce7968a72
commit 0bcd47ab4c
3 changed files with 218 additions and 61 deletions
--- a/hubot.py
+++ b/hubot.py
@@ -94,6 +94,11 @@ async def _cb_new_message(event) -> None:
    tasks = []
    # pass the event to all mods interested in it
    for mod_name in _mods:
        # mod is not enabled for this user?
        if mod_name not in sessions[name]['config']['accounts'][name]:
            continue
        if not sessions[name]['config']['accounts'][name][mod_name]:
            continue
        # mod
        mod = _mods[mod_name]
        try:
--- a/mod_video_downloader.py
+++ b/mod_video_downloader.py
@@ -12,6 +12,7 @@ import os
 from telethon import events
 from telethon.tl.types import PeerUser
 from telethon.tl.types import DocumentAttributeVideo
 from telethon.tl.types import DocumentAttributeAudio
 import utils
@@ -32,7 +33,7 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
    try:
        # prepare arguments
        args = [
-            utils.which('youtube-dl'),
+            utils.which('yt-dlp'),
            '-J',
            url
        ]
@@ -50,22 +51,44 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
            j = json.loads(txt)
        except:
            return cp.stderr.decode(encoding='ascii')
        if not j:
            return 'yt-dlp has failed to get the list of qualities'
        # dict to return
        res = {}
        title = j['title'] if 'title' in j else 'Без названия'
        author = j['channel'] if 'channel' in j else 'Неизвестен'
        thumbnail = j['thumbnail'] if 'thumbnail' in j else None
        # one format only - convert to many
        if 'formats' not in j:
            j['formats'] = [{
                'format': j['format'],
                'format_id': j['format_id'],
-                'ext': j['ext']
+                'ext': j['ext'],
                'video_ext': j['video_ext'],
                'audio_ext': j['audio_ext']
            }]
            if 'filesize' in j:
                j['formats'][0]['filesize'] = j['filesize']
        # check all formats
        for f in j['formats']:
            obj = {
                'title': title,
                'author': author,
                'url': url,
                'format': f['format_id'],
                'thumbnail': thumbnail,
                'ext': f['ext'],
                'format_name': f['format'],
                'is_audio': f['audio_ext'] != 'none',
                '_added': time.time()
            }
            # filesize exists
            try:
                if 'filesize' in f:
                    obj['filesize'] = int(f['filesize'])
            except:
                pass
            cache_id = utils.get_md5('\n'.join([str(obj[i]) for i in obj if i[0] != '_']))
            _cached_qualities[cache_id] = obj
            res[cache_id] = obj
@@ -93,7 +116,7 @@ def _download_video_raw(url: str, quality_code : str, path: str, proxy: bool, ti
        # prepare arguments
        args = [
-            utils.which('youtube-dl'),
+            utils.which('yt-dlp'),
            '--newline',
            '-f',
            data['format'],
@@ -159,7 +182,7 @@ async def _download_video(url: str, quality_code : str, path: str, proxy: bool,
    except:
        return traceback.format_exc()
-def _get_video_data_raw(path: str) -> dict | None:
+def _get_video_data_raw(path: str, audio: bool) -> dict | None:
    ''' Get video duration, width, height and file size '''
    try:
        # prepare arguments
@@ -168,9 +191,9 @@ def _get_video_data_raw(path: str) -> dict | None:
            '-v',
            'error',
            '-select_streams',
-            'v:0',
+            'a:0' if audio else 'v:0',
            '-show_entries',
-            'stream=width,height,duration',
+            'stream=duration' if audio else 'stream=width,height,duration',
            '-show_entries',
            'format=size',
            '-of',
@@ -188,24 +211,32 @@ def _get_video_data_raw(path: str) -> dict | None:
        try:
            txt = cp.stdout.decode(encoding='ascii').strip()
        except:
-            return None
+            txt = cp.stderr.decode(encoding='ascii').strip()
            return txt
        parts = [i.strip() for i in txt.split('\n') if i.strip()]
-        # result
+        # result for audio
-        return {
+        if audio:
-            'width': int(parts[0]),
+            return {
-            'height': int(parts[1]),
+                'duration': int(float(parts[0])),
-            'duration': int(float(parts[2])),
+                'size': int(parts[1])
-            'size': int(parts[3])
+            }
-        }
+        # result for video
        else:
            return {
                'width': int(parts[0]),
                'height': int(parts[1]),
                'duration': int(float(parts[2])),
                'size': int(parts[3])
            }
    except subprocess.TimeoutExpired:
        return 'ffprobe timed out'
    except:
        return traceback.format_exc()
-async def _get_video_data(path: str) -> dict | None:
+async def _get_video_data(path: str, audio: bool) -> dict | None:
    ''' Async version '''
    try:
-        return await asyncio.to_thread(_get_video_data_raw, path)
+        return await asyncio.to_thread(_get_video_data_raw, path, audio)
    except:
        return traceback.format_exc()
@@ -220,7 +251,7 @@ def _generate_thumb_raw(video: str, timestamp: int, thumb: str) -> bool:
            '-i',
            video,
            '-vf',
-            'thumbnail,scale=\'min(320,iw)\':\'min(320,ih)\':force_original_aspect_ratio=decrease',
+            'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
            '-frames:v',
            '1',
            thumb
@@ -254,6 +285,50 @@ async def _generate_thumb(video: str, timestamp: int, thumb: str) -> dict | None
    except:
        return traceback.format_exc()
 def _download_thumb_for_tg_raw(url: str, proxy: bool, thumb: str) -> str | None:
    ''' Downloads a thumbnail for Telegram '''
    try:
        # prepare arguments
        args = [
            utils.which('ffmpeg'),
            '-i',
            url,
            '-vf',
            'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
            thumb
        ]
        if proxy:
            args = [utils.which('proxychains4')] + args
        # start the process
        cp = subprocess.run(
            args,
            capture_output=True,
            timeout=10
        )
        # check the result
        txt = None
        try:
            txt = cp.stdout.decode(encoding='ascii').strip()
            txt = cp.stderr.decode(encoding='ascii').strip()
        except:
            pass
        parts = [i.strip() for i in txt.split('\n') if i.strip()]
        # result
        if not os.path.isfile(thumb):
            print('File does not exist somewhy')
        return None
    except subprocess.TimeoutExpired:
        return 'ffprobe timed out'
    except:
        return traceback.format_exc()
    return 'Wrong'
 async def _download_thumb_for_tg(url: str, proxy: bool, thumb: str) -> str | None:
    ''' Async version '''
    try:
        return await asyncio.to_thread(_download_thumb_for_tg_raw, url, proxy, thumb)
    except:
        return traceback.format_exc()
 async def mod_init(config: dict) -> bool:
    ''' Initialize the mod '''
@@ -284,7 +359,7 @@ def mod_get_mighty() -> bool:
 def mod_get_tags() -> None:
    ''' Get tags used by the mod '''
-    return ['mvd', 'mvdl', 'mvdlp', 'mvdd', 'mvddp']
+    return ['md', 'mvdl', 'mvdlp', 'madl', 'madlp', 'mdd', 'mddp']
 async def mod_new_message(session, event) -> None:
    ''' Handle new message '''
@@ -292,8 +367,8 @@ async def mod_new_message(session, event) -> None:
        # get the message
        msg = event.message
        # not outgoing - do not process
-        if not msg.out:
+        #if not msg.out:
-            return
+        #    return
        # peer must be user
        peer = msg.peer_id
        if type(peer) is not PeerUser:
@@ -307,13 +382,14 @@ async def mod_new_message(session, event) -> None:
        args = args[1:]
        await asyncio.sleep(0.5)
        # help
-        if cmd == 'mvd':
+        if cmd == 'md':
-            response_text = 'mod_video_downloader:'
+            response_text = 'mod_audio_video_downloader:'
            response_text += '\n- mvdl[p] [URL] - get list of all video qualities'
-            response_text += '\n- mvdd[p] [CODE] - download video'
+            response_text += '\n- madl[p] [URL] - get list of all audio qualities'
            response_text += '\n- mdd[p] [CODE] [TITLE] [§ PERFORMER] - download video or audio (for audio only: track title)'
            response_text += '\n\nUse \'p\' letter to utilize proxy'
            await event.reply(message=response_text)
-        # list qualities
+        # list video qualities
        elif cmd.startswith('mvdl'):
            if not args:
                await event.reply(message='No URL!')
@@ -328,28 +404,63 @@ async def mod_new_message(session, event) -> None:
            result = 'Qualities:'
            for qid in qualities:
                data = qualities[qid]
-                result += '\n\n<code>mvdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
+                # not a video
-                result += '\n- Format: %s' % data['format']
+                if data['is_audio']:
                    continue
                # extensions to ignore
                if data['ext'] in ['webm', 'mhtml']:
                    continue
                result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
                result += '\n- Format name: %s' % data['format_name']
                result += '\n- Extension: %s' % data['ext']
                if 'filesize' in data:
                    result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
            await event.reply(message=result, parse_mode='HTML')
        # list audio qualities
        elif cmd.startswith('madl'):
            if not args:
                await event.reply(message='No URL!')
                return
            await event.reply(message='Checking URL... Please wait, you\'ll be notified if an error happens!')
            qualities = await _get_all_qualities(' '.join(args), cmd[-1] == 'p')
            # error
            if type(qualities) is str:
                await event.reply(message='Error:\n\n%s' % qualities)
                return
            # success
            result = 'Qualities:'
            for qid in qualities:
                data = qualities[qid]
                # not an audio
                if not data['is_audio']:
                    continue
                # extensions to ignore
                if data['ext'] in ['webm']:
                    continue
                result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
                result += '\n- Format name: %s' % data['format_name']
                result += '\n- Extension: %s' % data['ext']
                if 'filesize' in data:
                    result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
            await event.reply(message=result, parse_mode='HTML')
        # download
-        elif cmd.startswith('mvdd'):
+        elif cmd.startswith('mdd'):
            if not args:
                await event.reply(message='No CODE!')
                return
            # get the code and check it
-            code = args[-1]
+            code = args[0]
            if code not in _cached_qualities:
                await event.reply(message='This code does not exist. Use \'mvdl[p]\' to obtain the code.')
                return
            # get video data
            data = _cached_qualities[code]
-            await event.reply(message='Downloading the video... Please wait, you\'ll be notified if an error happens!')
+            await event.reply(message='Downloading the media... Please wait, you\'ll be notified if an error happens!')
            res = await _download_video(data['url'], code, 'mvd_temp/%s.bin' % code, cmd[-1] == 'p')
            # res is str - error
            if type(res) is str:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to download video: %s' % res)
+                await event.reply(message='Failed to download media: %s' % res)
                return
            # res is false
            if not res:
@@ -365,41 +476,82 @@ async def mod_new_message(session, event) -> None:
                pass
            except:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to rename downloaded video')
+                await event.reply(message='Failed to rename downloaded media')
                return
            # get video data
-            video_data = await _get_video_data(new_name)
+            video_data = await _get_video_data(new_name, data['is_audio'])
            if type(video_data) is not dict:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to use \'ffprobe\' to get video data')
+                await event.reply(message='Failed to use \'ffprobe\' to get media data:\n\n%s' % video_data)
                return
-            # generate the thumbnail
+            # audio
-            thumb_name = 'mvd_temp/%s.jpg' % code
+            if data['is_audio']:
-            if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
+                # assign track title
-                utils.rm_glob('mvd_temp/%s.*' % code)
+                remains = [i.strip() for i in ' '.join(args[1:]).split('§') if i.strip()]
-                await event.reply(message='Failed to generate video thumbnail')
+                title = data['title']
-                return
+                author = data['author']
-            # log
+                thumbnail = data['thumbnail']
-            await event.reply(message='Video is downloaded, thumbnail is generated, uploading it to Telegram...')
+                # use title and author from user message
-            # send file
+                if remains:
-            try:
+                    title = remains[0]
-                await event.client.send_file(
+                    if len(remains) > 1:
-                    entity=peer,
+                        author = remains[-1]
-                    file=new_name,
+                # thumbnail exists, download it and change scale
-                    caption='%s' % data['url'],
+                if thumbnail:
-                    mime_type=utils.get_mime(data['ext']),
+                    thumb_path = 'mvd_temp/%s.jpg' % code
-                    file_size=video_data['size'],
+                    thumbnail = await _download_thumb_for_tg(thumbnail, cmd[-1] == 'p', thumb_path)
-                    thumb=thumb_name,
+                    if thumbnail:
-                    supports_streaming=True,
+                        await event.reply(message='WARNING (audio is still being processed - THIS IS NOT AN ERROR). Failed to download thumbnail.\n\n%s' % thumbnail)
-                    attributes=[DocumentAttributeVideo(
+                        thumbnail = None
-                        duration=video_data['duration'],
+                    else:
-                        w=video_data['width'],
+                        thumbnail = thumb_path
-                        h=video_data['height'],
+                # send file
-                        supports_streaming=True
+                await event.reply(message='Audio is downloaded, uploading it to Telegram...')
-                    )]
+                try:
-                )
+                    await event.client.send_file(
-            except:
+                        entity=peer,
-                await event.reply(message='Failed to upload video to telegram!')
+                        file=new_name,
                        caption='%s' % data['url'],
                        mime_type=utils.get_mime(data['ext']),
                        file_size=video_data['size'],
                        thumb=thumbnail,
                        attributes=[DocumentAttributeAudio(
                            duration=video_data['duration'],
                            title=title,
                            performer=author
                        )]
                    )
                except:
                    await event.reply(message='Failed to upload audio to telegram!\n\n%s' % traceback.format_exc())
            # video
            else:
                # generate the thumbnail
                thumb_name = 'mvd_temp/%s.jpg' % code
                if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
                    utils.rm_glob('mvd_temp/%s.*' % code)
                    await event.reply(message='Failed to generate media thumbnail')
                    return
                # log
                await event.reply(message='Media is downloaded, thumbnail is generated, uploading it to Telegram...')
                # send file
                try:
                    await event.client.send_file(
                        entity=peer,
                        file=new_name,
                        caption='%s' % data['url'],
                        mime_type=utils.get_mime(data['ext']),
                        file_size=video_data['size'],
                        thumb=thumb_name,
                        supports_streaming=True,
                        attributes=[DocumentAttributeVideo(
                            duration=video_data['duration'],
                            w=video_data['width'],
                            h=video_data['height'],
                            supports_streaming=True
                        )]
                    )
                except:
                    await event.reply(message='Failed to upload media to telegram!\n\n%s' % traceback.format_exc())
            utils.rm_glob('mvd_temp/%s.*' % code)
    except:
        utils.pex()
--- a/utils.py
+++ b/utils.py
@@ -36,7 +36,7 @@ def get_all_mods() -> list[str]:
 def get_md5(data: str) -> str:
    ''' Returns MD5 for data '''
    md5_hash = hashlib.md5()
-    md5_hash.update(str(data).encode('ascii'))
+    md5_hash.update(str(data).encode('ascii', errors='ignore'))
    return md5_hash.hexdigest()