Added audio downloading, switched to yt-dlp

- enabling and disabling mods using config.json works now - switched to yt-dlp (it supports YouTube without problems) - added audio downloading - changed 'mvd' mod tag to 'md' - added filesize in qualities list - changed thumbnail size 280x280 at most - .webm and .mhtml extensions are ignored now - fixed encoding errors in utils.get_md5 (they are ignored now, so not an actual fix, but it 100% won't break anythink)
2025-11-24 00:31:03 +03:00
parent fce7968a72
commit 0bcd47ab4c
3 changed files with 218 additions and 61 deletions
--- a/hubot.py
+++ b/hubot.py
@@ -94,6 +94,11 @@ async def _cb_new_message(event) -> None:
    tasks = []
    # pass the event to all mods interested in it
    for mod_name in _mods:
+        # mod is not enabled for this user?
+        if mod_name not in sessions[name]['config']['accounts'][name]:
+            continue
+        if not sessions[name]['config']['accounts'][name][mod_name]:
+            continue
        # mod
        mod = _mods[mod_name]
        try:
--- a/mod_video_downloader.py
+++ b/mod_video_downloader.py
@@ -12,6 +12,7 @@ import os
 from telethon import events
 from telethon.tl.types import PeerUser
 from telethon.tl.types import DocumentAttributeVideo
+from telethon.tl.types import DocumentAttributeAudio

 import utils

@@ -32,7 +33,7 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
    try:
        # prepare arguments
        args = [
-            utils.which('youtube-dl'),
+            utils.which('yt-dlp'),
            '-J',
            url
        ]
@@ -50,22 +51,44 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
            j = json.loads(txt)
        except:
            return cp.stderr.decode(encoding='ascii')
+        if not j:
+            return 'yt-dlp has failed to get the list of qualities'
+
        # dict to return
        res = {}
+        title = j['title'] if 'title' in j else 'Без названия'
+        author = j['channel'] if 'channel' in j else 'Неизвестен'
+        thumbnail = j['thumbnail'] if 'thumbnail' in j else None
        # one format only - convert to many
        if 'formats' not in j:
            j['formats'] = [{
+                'format': j['format'],
                'format_id': j['format_id'],
-                'ext': j['ext']
+                'ext': j['ext'],
+                'video_ext': j['video_ext'],
+                'audio_ext': j['audio_ext']
            }]
+            if 'filesize' in j:
+                j['formats'][0]['filesize'] = j['filesize']
        # check all formats
        for f in j['formats']:
            obj = {
+                'title': title,
+                'author': author,
                'url': url,
                'format': f['format_id'],
+                'thumbnail': thumbnail,
                'ext': f['ext'],
+                'format_name': f['format'],
+                'is_audio': f['audio_ext'] != 'none',
                '_added': time.time()
            }
+            # filesize exists
+            try:
+                if 'filesize' in f:
+                    obj['filesize'] = int(f['filesize'])
+            except:
+                pass
            cache_id = utils.get_md5('\n'.join([str(obj[i]) for i in obj if i[0] != '_']))
            _cached_qualities[cache_id] = obj
            res[cache_id] = obj
@@ -93,7 +116,7 @@ def _download_video_raw(url: str, quality_code : str, path: str, proxy: bool, ti

        # prepare arguments
        args = [
-            utils.which('youtube-dl'),
+            utils.which('yt-dlp'),
            '--newline',
            '-f',
            data['format'],
@@ -159,7 +182,7 @@ async def _download_video(url: str, quality_code : str, path: str, proxy: bool,
    except:
        return traceback.format_exc()

-def _get_video_data_raw(path: str) -> dict | None:
+def _get_video_data_raw(path: str, audio: bool) -> dict | None:
    ''' Get video duration, width, height and file size '''
    try:
        # prepare arguments
@@ -168,9 +191,9 @@ def _get_video_data_raw(path: str) -> dict | None:
            '-v',
            'error',
            '-select_streams',
-            'v:0',
+            'a:0' if audio else 'v:0',
            '-show_entries',
-            'stream=width,height,duration',
+            'stream=duration' if audio else 'stream=width,height,duration',
            '-show_entries',
            'format=size',
            '-of',
@@ -188,24 +211,32 @@ def _get_video_data_raw(path: str) -> dict | None:
        try:
            txt = cp.stdout.decode(encoding='ascii').strip()
        except:
-            return None
+            txt = cp.stderr.decode(encoding='ascii').strip()
+            return txt
        parts = [i.strip() for i in txt.split('\n') if i.strip()]
-        # result
-        return {
-            'width': int(parts[0]),
-            'height': int(parts[1]),
-            'duration': int(float(parts[2])),
-            'size': int(parts[3])
-        }
+        # result for audio
+        if audio:
+            return {
+                'duration': int(float(parts[0])),
+                'size': int(parts[1])
+            }
+        # result for video
+        else:
+            return {
+                'width': int(parts[0]),
+                'height': int(parts[1]),
+                'duration': int(float(parts[2])),
+                'size': int(parts[3])
+            }
    except subprocess.TimeoutExpired:
        return 'ffprobe timed out'
    except:
        return traceback.format_exc()
    
-async def _get_video_data(path: str) -> dict | None:
+async def _get_video_data(path: str, audio: bool) -> dict | None:
    ''' Async version '''
    try:
-        return await asyncio.to_thread(_get_video_data_raw, path)
+        return await asyncio.to_thread(_get_video_data_raw, path, audio)
    except:
        return traceback.format_exc()

@@ -220,7 +251,7 @@ def _generate_thumb_raw(video: str, timestamp: int, thumb: str) -> bool:
            '-i',
            video,
            '-vf',
-            'thumbnail,scale=\'min(320,iw)\':\'min(320,ih)\':force_original_aspect_ratio=decrease',
+            'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
            '-frames:v',
            '1',
            thumb
@@ -254,6 +285,50 @@ async def _generate_thumb(video: str, timestamp: int, thumb: str) -> dict | None
    except:
        return traceback.format_exc()
    
+def _download_thumb_for_tg_raw(url: str, proxy: bool, thumb: str) -> str | None:
+    ''' Downloads a thumbnail for Telegram '''
+    try:
+        # prepare arguments
+        args = [
+            utils.which('ffmpeg'),
+            '-i',
+            url,
+            '-vf',
+            'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
+            thumb
+        ]
+        if proxy:
+            args = [utils.which('proxychains4')] + args
+        # start the process
+        cp = subprocess.run(
+            args,
+            capture_output=True,
+            timeout=10
+        )
+        # check the result
+        txt = None
+        try:
+            txt = cp.stdout.decode(encoding='ascii').strip()
+            txt = cp.stderr.decode(encoding='ascii').strip()
+        except:
+            pass
+        parts = [i.strip() for i in txt.split('\n') if i.strip()]
+        # result
+        if not os.path.isfile(thumb):
+            print('File does not exist somewhy')
+        return None
+    except subprocess.TimeoutExpired:
+        return 'ffprobe timed out'
+    except:
+        return traceback.format_exc()
+    return 'Wrong'
+
+async def _download_thumb_for_tg(url: str, proxy: bool, thumb: str) -> str | None:
+    ''' Async version '''
+    try:
+        return await asyncio.to_thread(_download_thumb_for_tg_raw, url, proxy, thumb)
+    except:
+        return traceback.format_exc()

 async def mod_init(config: dict) -> bool:
    ''' Initialize the mod '''
@@ -284,7 +359,7 @@ def mod_get_mighty() -> bool:

 def mod_get_tags() -> None:
    ''' Get tags used by the mod '''
-    return ['mvd', 'mvdl', 'mvdlp', 'mvdd', 'mvddp']
+    return ['md', 'mvdl', 'mvdlp', 'madl', 'madlp', 'mdd', 'mddp']

 async def mod_new_message(session, event) -> None:
    ''' Handle new message '''
@@ -292,8 +367,8 @@ async def mod_new_message(session, event) -> None:
        # get the message
        msg = event.message
        # not outgoing - do not process
-        if not msg.out:
-            return
+        #if not msg.out:
+        #    return
        # peer must be user
        peer = msg.peer_id
        if type(peer) is not PeerUser:
@@ -307,13 +382,14 @@ async def mod_new_message(session, event) -> None:
        args = args[1:]
        await asyncio.sleep(0.5)
        # help
-        if cmd == 'mvd':
-            response_text = 'mod_video_downloader:'
+        if cmd == 'md':
+            response_text = 'mod_audio_video_downloader:'
            response_text += '\n- mvdl[p] [URL] - get list of all video qualities'
-            response_text += '\n- mvdd[p] [CODE] - download video'
+            response_text += '\n- madl[p] [URL] - get list of all audio qualities'
+            response_text += '\n- mdd[p] [CODE] [TITLE] [§ PERFORMER] - download video or audio (for audio only: track title)'
            response_text += '\n\nUse \'p\' letter to utilize proxy'
            await event.reply(message=response_text)
-        # list qualities
+        # list video qualities
        elif cmd.startswith('mvdl'):
            if not args:
                await event.reply(message='No URL!')
@@ -328,28 +404,63 @@ async def mod_new_message(session, event) -> None:
            result = 'Qualities:'
            for qid in qualities:
                data = qualities[qid]
-                result += '\n\n<code>mvdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
-                result += '\n- Format: %s' % data['format']
+                # not a video
+                if data['is_audio']:
+                    continue
+                # extensions to ignore
+                if data['ext'] in ['webm', 'mhtml']:
+                    continue
+                result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
+                result += '\n- Format name: %s' % data['format_name']
                result += '\n- Extension: %s' % data['ext']
+                if 'filesize' in data:
+                    result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
+            await event.reply(message=result, parse_mode='HTML')
+        # list audio qualities
+        elif cmd.startswith('madl'):
+            if not args:
+                await event.reply(message='No URL!')
+                return
+            await event.reply(message='Checking URL... Please wait, you\'ll be notified if an error happens!')
+            qualities = await _get_all_qualities(' '.join(args), cmd[-1] == 'p')
+            # error
+            if type(qualities) is str:
+                await event.reply(message='Error:\n\n%s' % qualities)
+                return
+            # success
+            result = 'Qualities:'
+            for qid in qualities:
+                data = qualities[qid]
+                # not an audio
+                if not data['is_audio']:
+                    continue
+                # extensions to ignore
+                if data['ext'] in ['webm']:
+                    continue
+                result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
+                result += '\n- Format name: %s' % data['format_name']
+                result += '\n- Extension: %s' % data['ext']
+                if 'filesize' in data:
+                    result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
            await event.reply(message=result, parse_mode='HTML')
        # download
-        elif cmd.startswith('mvdd'):
+        elif cmd.startswith('mdd'):
            if not args:
                await event.reply(message='No CODE!')
                return
            # get the code and check it
-            code = args[-1]
+            code = args[0]
            if code not in _cached_qualities:
                await event.reply(message='This code does not exist. Use \'mvdl[p]\' to obtain the code.')
                return
            # get video data
            data = _cached_qualities[code]
-            await event.reply(message='Downloading the video... Please wait, you\'ll be notified if an error happens!')
+            await event.reply(message='Downloading the media... Please wait, you\'ll be notified if an error happens!')
            res = await _download_video(data['url'], code, 'mvd_temp/%s.bin' % code, cmd[-1] == 'p')
            # res is str - error
            if type(res) is str:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to download video: %s' % res)
+                await event.reply(message='Failed to download media: %s' % res)
                return
            # res is false
            if not res:
@@ -365,41 +476,82 @@ async def mod_new_message(session, event) -> None:
                pass
            except:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to rename downloaded video')
+                await event.reply(message='Failed to rename downloaded media')
                return
            # get video data
-            video_data = await _get_video_data(new_name)
+            video_data = await _get_video_data(new_name, data['is_audio'])
            if type(video_data) is not dict:
                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to use \'ffprobe\' to get video data')
+                await event.reply(message='Failed to use \'ffprobe\' to get media data:\n\n%s' % video_data)
                return
-            # generate the thumbnail
-            thumb_name = 'mvd_temp/%s.jpg' % code
-            if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
-                utils.rm_glob('mvd_temp/%s.*' % code)
-                await event.reply(message='Failed to generate video thumbnail')
-                return
-            # log
-            await event.reply(message='Video is downloaded, thumbnail is generated, uploading it to Telegram...')
-            # send file
-            try:
-                await event.client.send_file(
-                    entity=peer,
-                    file=new_name,
-                    caption='%s' % data['url'],
-                    mime_type=utils.get_mime(data['ext']),
-                    file_size=video_data['size'],
-                    thumb=thumb_name,
-                    supports_streaming=True,
-                    attributes=[DocumentAttributeVideo(
-                        duration=video_data['duration'],
-                        w=video_data['width'],
-                        h=video_data['height'],
-                        supports_streaming=True
-                    )]
-                )
-            except:
-                await event.reply(message='Failed to upload video to telegram!')
+            # audio
+            if data['is_audio']:
+                # assign track title
+                remains = [i.strip() for i in ' '.join(args[1:]).split('§') if i.strip()]
+                title = data['title']
+                author = data['author']
+                thumbnail = data['thumbnail']
+                # use title and author from user message
+                if remains:
+                    title = remains[0]
+                    if len(remains) > 1:
+                        author = remains[-1]
+                # thumbnail exists, download it and change scale
+                if thumbnail:
+                    thumb_path = 'mvd_temp/%s.jpg' % code
+                    thumbnail = await _download_thumb_for_tg(thumbnail, cmd[-1] == 'p', thumb_path)
+                    if thumbnail:
+                        await event.reply(message='WARNING (audio is still being processed - THIS IS NOT AN ERROR). Failed to download thumbnail.\n\n%s' % thumbnail)
+                        thumbnail = None
+                    else:
+                        thumbnail = thumb_path
+                # send file
+                await event.reply(message='Audio is downloaded, uploading it to Telegram...')
+                try:
+                    await event.client.send_file(
+                        entity=peer,
+                        file=new_name,
+                        caption='%s' % data['url'],
+                        mime_type=utils.get_mime(data['ext']),
+                        file_size=video_data['size'],
+                        thumb=thumbnail,
+                        attributes=[DocumentAttributeAudio(
+                            duration=video_data['duration'],
+                            title=title,
+                            performer=author
+                        )]
+                    )
+                except:
+                    await event.reply(message='Failed to upload audio to telegram!\n\n%s' % traceback.format_exc())
+            # video
+            else:
+                # generate the thumbnail
+                thumb_name = 'mvd_temp/%s.jpg' % code
+                if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
+                    utils.rm_glob('mvd_temp/%s.*' % code)
+                    await event.reply(message='Failed to generate media thumbnail')
+                    return
+                # log
+                await event.reply(message='Media is downloaded, thumbnail is generated, uploading it to Telegram...')
+                # send file
+                try:
+                    await event.client.send_file(
+                        entity=peer,
+                        file=new_name,
+                        caption='%s' % data['url'],
+                        mime_type=utils.get_mime(data['ext']),
+                        file_size=video_data['size'],
+                        thumb=thumb_name,
+                        supports_streaming=True,
+                        attributes=[DocumentAttributeVideo(
+                            duration=video_data['duration'],
+                            w=video_data['width'],
+                            h=video_data['height'],
+                            supports_streaming=True
+                        )]
+                    )
+                except:
+                    await event.reply(message='Failed to upload media to telegram!\n\n%s' % traceback.format_exc())
            utils.rm_glob('mvd_temp/%s.*' % code)
    except:
        utils.pex()
--- a/utils.py
+++ b/utils.py
@@ -36,7 +36,7 @@ def get_all_mods() -> list[str]:
 def get_md5(data: str) -> str:
    ''' Returns MD5 for data '''
    md5_hash = hashlib.md5()
-    md5_hash.update(str(data).encode('ascii'))
+    md5_hash.update(str(data).encode('ascii', errors='ignore'))
    return md5_hash.hexdigest()