diff --git a/discord/abc.py b/discord/abc.py index 535c70abaad4..9b7d63e012d3 100644 --- a/discord/abc.py +++ b/discord/abc.py @@ -1671,12 +1671,21 @@ async def send( if view and not hasattr(view, '__discord_ui_view__'): raise TypeError(f'view parameter must be View not {view.__class__.__name__}') - if suppress_embeds or silent: + voice = False + if file is not None and file.voice: + if content is not None: + raise TypeError('Cannot send content with a voice message') + if embed is not None or embeds is not None: + raise TypeError('Cannot send embeds with a voice message') + voice = True + + if suppress_embeds or silent or voice: from .message import MessageFlags # circular import flags = MessageFlags._from_value(0) flags.suppress_embeds = suppress_embeds flags.suppress_notifications = silent + flags.voice = voice else: flags = MISSING diff --git a/discord/file.py b/discord/file.py index 8f9102216960..f91a2f4b593d 100644 --- a/discord/file.py +++ b/discord/file.py @@ -27,6 +27,10 @@ import os import io +import base64 +from .oggparse import OggStream +from .opus import Decoder +import struct from .utils import MISSING @@ -75,9 +79,47 @@ class File: The file description to display, currently only supported for images. .. versionadded:: 2.0 + + voice: :class:`bool` + Whether the file is a voice message. If left unspecified, the :attr:`~File.duration` is used + to determine if the file is a voice message. + + .. note:: + + Voice files must be an audio only format. + + A *non-exhaustive* list of supported formats are: `ogg`, `mp3`, `wav`, `aac`, and `flac`. + + .. versionadded:: 2.7 + + duration: Optional[:class:`float`] + The duration of the voice message in seconds + + .. versionadded:: 2.7 + + waveform: Optional[List[:class:`int`]] + The waveform data of the voice message + + .. note:: + If a waveform is not given, it will be generated for files where voice is ``True`` + + Accurate generation is only provided for the Opus format. Other formats will be provided with a random waveform + + .. versionadded:: 2.7 """ - __slots__ = ('fp', '_filename', 'spoiler', 'description', '_original_pos', '_owner', '_closer') + __slots__ = ( + 'fp', + '_filename', + 'spoiler', + 'description', + '_original_pos', + '_owner', + '_closer', + 'duration', + 'waveform', + 'voice', + ) def __init__( self, @@ -86,6 +128,9 @@ def __init__( *, spoiler: bool = MISSING, description: Optional[str] = None, + voice: bool = MISSING, + duration: Optional[float] = None, + waveform: Optional[list[int]] = None, ): if isinstance(fp, io.IOBase): if not (fp.seekable() and fp.readable()): @@ -117,6 +162,30 @@ def __init__( self.spoiler: bool = spoiler self.description: Optional[str] = description + self.duration = duration + if waveform is not None: + if len(waveform) > 256: + raise ValueError('Waveforms have a maximum of 256 values') + elif max(waveform) > 255: + raise ValueError('Maximum value of ints is 255 for waveforms') + elif min(waveform) < 0: + raise ValueError('Minimum value of ints is 0 for waveforms') + + if voice is MISSING: + voice = duration is not None + self.voice = voice + + if duration is None and voice: + raise TypeError('Voice messages must have a duration') + + if waveform is None and voice: + try: + self.waveform = self.generate_waveform() + except Exception: + self.waveform = list(os.urandom(256)) + self.reset() + else: + self.waveform = waveform @property def filename(self) -> str: @@ -170,4 +239,62 @@ def to_dict(self, index: int) -> Dict[str, Any]: if self.description is not None: payload['description'] = self.description + if self.voice: + payload['duration_secs'] = self.duration + payload['waveform'] = base64.b64encode(bytes(self.waveform)).decode('utf-8') # type: ignore + return payload + + def generate_waveform(self) -> list[int]: + if not self.voice: + raise ValueError('Cannot produce waveform for non voice file') + self.reset() + ogg = OggStream(self.fp) # type: ignore + decoder = Decoder() + waveform: list[int] = [] + prefixes = [b'OpusHead', b'OpusTags'] + for packet in ogg.iter_packets(): + if packet[:8] in prefixes: + continue + + if b'vorbis' in packet: + raise ValueError("File format is 'vorbis'. Format of 'opus' is required for waveform generation") + + # these are PCM bytes in 16-bit signed little-endian form + decoded = decoder.decode(packet, fec=False) + + # 16 bits -> 2 bytes per sample + num_samples = len(decoded) // 2 + + # https://docs.python.org/3/library/struct.html#byte-order-size-and-alignment + format = '<' + 'h' * num_samples + samples: tuple[int] = struct.unpack(format, decoded) + + waveform.extend(samples) + + # Make sure all values are positive + for i in range(len(waveform)): + if waveform[i] < 0: + waveform[i] = -waveform[i] + + point_count: int = self.duration * 10 # type: ignore + point_count = min(point_count, 255) + points_per_sample: int = len(waveform) // point_count + sample_waveform: list[int] = [] + + total, count = 0, 0 + # Average out the amplitudes for each point within a sample + for i in range(len(waveform)): + total += waveform[i] + count += 1 + if i % points_per_sample == 0: + sample_waveform.append(total // count) + total, count = 0, 0 + + # Maximum value of a waveform is 0xff (255) + highest = max(sample_waveform) + mult = 255 / highest + for i in range(len(sample_waveform)): + sample_waveform[i] = int(sample_waveform[i] * mult) + + return sample_waveform