Added audio downloading, switched to yt-dlp

- enabling and disabling mods using config.json works now
- switched to yt-dlp (it supports YouTube without problems)
- added audio downloading
- changed 'mvd' mod tag to 'md'
- added filesize in qualities list
- changed thumbnail size 280x280 at most
- .webm and .mhtml extensions are ignored now
- fixed encoding errors in utils.get_md5 (they are ignored now, so not an actual fix, but it 100% won't break anythink)
This commit is contained in:
Nikita Tyukalov, ASUS, Linux
2025-11-24 00:31:03 +03:00
parent fce7968a72
commit 0bcd47ab4c
3 changed files with 218 additions and 61 deletions

View File

@@ -94,6 +94,11 @@ async def _cb_new_message(event) -> None:
tasks = []
# pass the event to all mods interested in it
for mod_name in _mods:
# mod is not enabled for this user?
if mod_name not in sessions[name]['config']['accounts'][name]:
continue
if not sessions[name]['config']['accounts'][name][mod_name]:
continue
# mod
mod = _mods[mod_name]
try:

View File

@@ -12,6 +12,7 @@ import os
from telethon import events
from telethon.tl.types import PeerUser
from telethon.tl.types import DocumentAttributeVideo
from telethon.tl.types import DocumentAttributeAudio
import utils
@@ -32,7 +33,7 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
try:
# prepare arguments
args = [
utils.which('youtube-dl'),
utils.which('yt-dlp'),
'-J',
url
]
@@ -50,22 +51,44 @@ def _get_all_qualities_raw(url: str, proxy: bool, timeout: float = 20) -> dict |
j = json.loads(txt)
except:
return cp.stderr.decode(encoding='ascii')
if not j:
return 'yt-dlp has failed to get the list of qualities'
# dict to return
res = {}
title = j['title'] if 'title' in j else 'Без названия'
author = j['channel'] if 'channel' in j else 'Неизвестен'
thumbnail = j['thumbnail'] if 'thumbnail' in j else None
# one format only - convert to many
if 'formats' not in j:
j['formats'] = [{
'format': j['format'],
'format_id': j['format_id'],
'ext': j['ext']
'ext': j['ext'],
'video_ext': j['video_ext'],
'audio_ext': j['audio_ext']
}]
if 'filesize' in j:
j['formats'][0]['filesize'] = j['filesize']
# check all formats
for f in j['formats']:
obj = {
'title': title,
'author': author,
'url': url,
'format': f['format_id'],
'thumbnail': thumbnail,
'ext': f['ext'],
'format_name': f['format'],
'is_audio': f['audio_ext'] != 'none',
'_added': time.time()
}
# filesize exists
try:
if 'filesize' in f:
obj['filesize'] = int(f['filesize'])
except:
pass
cache_id = utils.get_md5('\n'.join([str(obj[i]) for i in obj if i[0] != '_']))
_cached_qualities[cache_id] = obj
res[cache_id] = obj
@@ -93,7 +116,7 @@ def _download_video_raw(url: str, quality_code : str, path: str, proxy: bool, ti
# prepare arguments
args = [
utils.which('youtube-dl'),
utils.which('yt-dlp'),
'--newline',
'-f',
data['format'],
@@ -159,7 +182,7 @@ async def _download_video(url: str, quality_code : str, path: str, proxy: bool,
except:
return traceback.format_exc()
def _get_video_data_raw(path: str) -> dict | None:
def _get_video_data_raw(path: str, audio: bool) -> dict | None:
''' Get video duration, width, height and file size '''
try:
# prepare arguments
@@ -168,9 +191,9 @@ def _get_video_data_raw(path: str) -> dict | None:
'-v',
'error',
'-select_streams',
'v:0',
'a:0' if audio else 'v:0',
'-show_entries',
'stream=width,height,duration',
'stream=duration' if audio else 'stream=width,height,duration',
'-show_entries',
'format=size',
'-of',
@@ -188,24 +211,32 @@ def _get_video_data_raw(path: str) -> dict | None:
try:
txt = cp.stdout.decode(encoding='ascii').strip()
except:
return None
txt = cp.stderr.decode(encoding='ascii').strip()
return txt
parts = [i.strip() for i in txt.split('\n') if i.strip()]
# result
return {
'width': int(parts[0]),
'height': int(parts[1]),
'duration': int(float(parts[2])),
'size': int(parts[3])
}
# result for audio
if audio:
return {
'duration': int(float(parts[0])),
'size': int(parts[1])
}
# result for video
else:
return {
'width': int(parts[0]),
'height': int(parts[1]),
'duration': int(float(parts[2])),
'size': int(parts[3])
}
except subprocess.TimeoutExpired:
return 'ffprobe timed out'
except:
return traceback.format_exc()
async def _get_video_data(path: str) -> dict | None:
async def _get_video_data(path: str, audio: bool) -> dict | None:
''' Async version '''
try:
return await asyncio.to_thread(_get_video_data_raw, path)
return await asyncio.to_thread(_get_video_data_raw, path, audio)
except:
return traceback.format_exc()
@@ -220,7 +251,7 @@ def _generate_thumb_raw(video: str, timestamp: int, thumb: str) -> bool:
'-i',
video,
'-vf',
'thumbnail,scale=\'min(320,iw)\':\'min(320,ih)\':force_original_aspect_ratio=decrease',
'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
'-frames:v',
'1',
thumb
@@ -254,6 +285,50 @@ async def _generate_thumb(video: str, timestamp: int, thumb: str) -> dict | None
except:
return traceback.format_exc()
def _download_thumb_for_tg_raw(url: str, proxy: bool, thumb: str) -> str | None:
''' Downloads a thumbnail for Telegram '''
try:
# prepare arguments
args = [
utils.which('ffmpeg'),
'-i',
url,
'-vf',
'thumbnail,scale=\'min(280,iw)\':\'min(280,ih)\':force_original_aspect_ratio=decrease',
thumb
]
if proxy:
args = [utils.which('proxychains4')] + args
# start the process
cp = subprocess.run(
args,
capture_output=True,
timeout=10
)
# check the result
txt = None
try:
txt = cp.stdout.decode(encoding='ascii').strip()
txt = cp.stderr.decode(encoding='ascii').strip()
except:
pass
parts = [i.strip() for i in txt.split('\n') if i.strip()]
# result
if not os.path.isfile(thumb):
print('File does not exist somewhy')
return None
except subprocess.TimeoutExpired:
return 'ffprobe timed out'
except:
return traceback.format_exc()
return 'Wrong'
async def _download_thumb_for_tg(url: str, proxy: bool, thumb: str) -> str | None:
''' Async version '''
try:
return await asyncio.to_thread(_download_thumb_for_tg_raw, url, proxy, thumb)
except:
return traceback.format_exc()
async def mod_init(config: dict) -> bool:
''' Initialize the mod '''
@@ -284,7 +359,7 @@ def mod_get_mighty() -> bool:
def mod_get_tags() -> None:
''' Get tags used by the mod '''
return ['mvd', 'mvdl', 'mvdlp', 'mvdd', 'mvddp']
return ['md', 'mvdl', 'mvdlp', 'madl', 'madlp', 'mdd', 'mddp']
async def mod_new_message(session, event) -> None:
''' Handle new message '''
@@ -292,8 +367,8 @@ async def mod_new_message(session, event) -> None:
# get the message
msg = event.message
# not outgoing - do not process
if not msg.out:
return
#if not msg.out:
# return
# peer must be user
peer = msg.peer_id
if type(peer) is not PeerUser:
@@ -307,13 +382,14 @@ async def mod_new_message(session, event) -> None:
args = args[1:]
await asyncio.sleep(0.5)
# help
if cmd == 'mvd':
response_text = 'mod_video_downloader:'
if cmd == 'md':
response_text = 'mod_audio_video_downloader:'
response_text += '\n- mvdl[p] [URL] - get list of all video qualities'
response_text += '\n- mvdd[p] [CODE] - download video'
response_text += '\n- madl[p] [URL] - get list of all audio qualities'
response_text += '\n- mdd[p] [CODE] [TITLE] [§ PERFORMER] - download video or audio (for audio only: track title)'
response_text += '\n\nUse \'p\' letter to utilize proxy'
await event.reply(message=response_text)
# list qualities
# list video qualities
elif cmd.startswith('mvdl'):
if not args:
await event.reply(message='No URL!')
@@ -328,28 +404,63 @@ async def mod_new_message(session, event) -> None:
result = 'Qualities:'
for qid in qualities:
data = qualities[qid]
result += '\n\n<code>mvdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
result += '\n- Format: %s' % data['format']
# not a video
if data['is_audio']:
continue
# extensions to ignore
if data['ext'] in ['webm', 'mhtml']:
continue
result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
result += '\n- Format name: %s' % data['format_name']
result += '\n- Extension: %s' % data['ext']
if 'filesize' in data:
result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
await event.reply(message=result, parse_mode='HTML')
# list audio qualities
elif cmd.startswith('madl'):
if not args:
await event.reply(message='No URL!')
return
await event.reply(message='Checking URL... Please wait, you\'ll be notified if an error happens!')
qualities = await _get_all_qualities(' '.join(args), cmd[-1] == 'p')
# error
if type(qualities) is str:
await event.reply(message='Error:\n\n%s' % qualities)
return
# success
result = 'Qualities:'
for qid in qualities:
data = qualities[qid]
# not an audio
if not data['is_audio']:
continue
# extensions to ignore
if data['ext'] in ['webm']:
continue
result += '\n\n<code>mdd%s %s</code>' % ('p' if cmd[-1] == 'p' else '', qid)
result += '\n- Format name: %s' % data['format_name']
result += '\n- Extension: %s' % data['ext']
if 'filesize' in data:
result += '\n- Filesize: %s KB' % (data['filesize'] / 1000)
await event.reply(message=result, parse_mode='HTML')
# download
elif cmd.startswith('mvdd'):
elif cmd.startswith('mdd'):
if not args:
await event.reply(message='No CODE!')
return
# get the code and check it
code = args[-1]
code = args[0]
if code not in _cached_qualities:
await event.reply(message='This code does not exist. Use \'mvdl[p]\' to obtain the code.')
return
# get video data
data = _cached_qualities[code]
await event.reply(message='Downloading the video... Please wait, you\'ll be notified if an error happens!')
await event.reply(message='Downloading the media... Please wait, you\'ll be notified if an error happens!')
res = await _download_video(data['url'], code, 'mvd_temp/%s.bin' % code, cmd[-1] == 'p')
# res is str - error
if type(res) is str:
utils.rm_glob('mvd_temp/%s.*' % code)
await event.reply(message='Failed to download video: %s' % res)
await event.reply(message='Failed to download media: %s' % res)
return
# res is false
if not res:
@@ -365,41 +476,82 @@ async def mod_new_message(session, event) -> None:
pass
except:
utils.rm_glob('mvd_temp/%s.*' % code)
await event.reply(message='Failed to rename downloaded video')
await event.reply(message='Failed to rename downloaded media')
return
# get video data
video_data = await _get_video_data(new_name)
video_data = await _get_video_data(new_name, data['is_audio'])
if type(video_data) is not dict:
utils.rm_glob('mvd_temp/%s.*' % code)
await event.reply(message='Failed to use \'ffprobe\' to get video data')
await event.reply(message='Failed to use \'ffprobe\' to get media data:\n\n%s' % video_data)
return
# generate the thumbnail
thumb_name = 'mvd_temp/%s.jpg' % code
if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
utils.rm_glob('mvd_temp/%s.*' % code)
await event.reply(message='Failed to generate video thumbnail')
return
# log
await event.reply(message='Video is downloaded, thumbnail is generated, uploading it to Telegram...')
# send file
try:
await event.client.send_file(
entity=peer,
file=new_name,
caption='%s' % data['url'],
mime_type=utils.get_mime(data['ext']),
file_size=video_data['size'],
thumb=thumb_name,
supports_streaming=True,
attributes=[DocumentAttributeVideo(
duration=video_data['duration'],
w=video_data['width'],
h=video_data['height'],
supports_streaming=True
)]
)
except:
await event.reply(message='Failed to upload video to telegram!')
# audio
if data['is_audio']:
# assign track title
remains = [i.strip() for i in ' '.join(args[1:]).split('§') if i.strip()]
title = data['title']
author = data['author']
thumbnail = data['thumbnail']
# use title and author from user message
if remains:
title = remains[0]
if len(remains) > 1:
author = remains[-1]
# thumbnail exists, download it and change scale
if thumbnail:
thumb_path = 'mvd_temp/%s.jpg' % code
thumbnail = await _download_thumb_for_tg(thumbnail, cmd[-1] == 'p', thumb_path)
if thumbnail:
await event.reply(message='WARNING (audio is still being processed - THIS IS NOT AN ERROR). Failed to download thumbnail.\n\n%s' % thumbnail)
thumbnail = None
else:
thumbnail = thumb_path
# send file
await event.reply(message='Audio is downloaded, uploading it to Telegram...')
try:
await event.client.send_file(
entity=peer,
file=new_name,
caption='%s' % data['url'],
mime_type=utils.get_mime(data['ext']),
file_size=video_data['size'],
thumb=thumbnail,
attributes=[DocumentAttributeAudio(
duration=video_data['duration'],
title=title,
performer=author
)]
)
except:
await event.reply(message='Failed to upload audio to telegram!\n\n%s' % traceback.format_exc())
# video
else:
# generate the thumbnail
thumb_name = 'mvd_temp/%s.jpg' % code
if not await _generate_thumb(new_name, int(video_data['duration'] * 0.75), thumb_name):
utils.rm_glob('mvd_temp/%s.*' % code)
await event.reply(message='Failed to generate media thumbnail')
return
# log
await event.reply(message='Media is downloaded, thumbnail is generated, uploading it to Telegram...')
# send file
try:
await event.client.send_file(
entity=peer,
file=new_name,
caption='%s' % data['url'],
mime_type=utils.get_mime(data['ext']),
file_size=video_data['size'],
thumb=thumb_name,
supports_streaming=True,
attributes=[DocumentAttributeVideo(
duration=video_data['duration'],
w=video_data['width'],
h=video_data['height'],
supports_streaming=True
)]
)
except:
await event.reply(message='Failed to upload media to telegram!\n\n%s' % traceback.format_exc())
utils.rm_glob('mvd_temp/%s.*' % code)
except:
utils.pex()

View File

@@ -36,7 +36,7 @@ def get_all_mods() -> list[str]:
def get_md5(data: str) -> str:
''' Returns MD5 for data '''
md5_hash = hashlib.md5()
md5_hash.update(str(data).encode('ascii'))
md5_hash.update(str(data).encode('ascii', errors='ignore'))
return md5_hash.hexdigest()