Oncorporation
diff --git a/‎_user_history/Surn/audios/edm_my_vampiresuykb4r24_76a2456eab4e4155aea9c54a39a251ca.wav‎
5.62 MB b/‎_user_history/Surn/audios/edm_my_vampiresuykb4r24_76a2456eab4e4155aea9c54a39a251ca.wav‎
5.62 MB
diff --git a/‎_user_history/Surn/audios/edm_my_vampiresyp05x2i9_d1435bab236e4695a1e26d6bc1b6739f.wav‎
625 KB b/‎_user_history/Surn/audios/edm_my_vampiresyp05x2i9_d1435bab236e4695a1e26d6bc1b6739f.wav‎
625 KB
diff --git a/‎_user_history/Surn/history.jsonl‎
Lines changed: 1 addition & 0 deletions b/‎_user_history/Surn/history.jsonl‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎_user_history/Surn/images/4bdae45ae7ca48449ad5b6647eed48ea_tmpk20ym4na.png‎
976 KB b/‎_user_history/Surn/images/4bdae45ae7ca48449ad5b6647eed48ea_tmpk20ym4na.png‎
976 KB
diff --git a/‎_user_history/Surn/images/cc2ba0e74c534e03b6defabb4a127ab9_tmpi1svt7_3.png‎
976 KB b/‎_user_history/Surn/images/cc2ba0e74c534e03b6defabb4a127ab9_tmpi1svt7_3.png‎
976 KB
diff --git a/‎_user_history/Surn/videos/edm_my_vampires_74dff59ee9da4b30babfc049e1fdbbc3.mp4‎
184 KB b/‎_user_history/Surn/videos/edm_my_vampires_74dff59ee9da4b30babfc049e1fdbbc3.mp4‎
184 KB
diff --git a/‎_user_history/Surn/videos/edm_my_vampires_d4877dbfe00f4c27a528fca57a561859.mp4‎
1.17 MB b/‎_user_history/Surn/videos/edm_my_vampires_d4877dbfe00f4c27a528fca57a561859.mp4‎
1.17 MB
diff --git a/‎app.py‎
Lines changed: 35 additions & 16 deletions b/‎app.py‎
Lines changed: 35 additions & 16 deletions
diff --git a/‎audiocraft/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎audiocraft/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎audiocraft/models/musicgen.py‎
Lines changed: 22 additions & 17 deletions b/‎audiocraft/models/musicgen.py‎
Lines changed: 22 additions & 17 deletions
@@ -0,0 +1 @@
+{"image_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\images\\4bdae45ae7ca48449ad5b6647eed48ea_tmpk20ym4na.png", "video_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\videos\\edm_my_vampires_74dff59ee9da4b30babfc049e1fdbbc3.mp4", "audio_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\audios\\edm_my_vampiresyp05x2i9_d1435bab236e4695a1e26d6bc1b6739f.wav", "document_path": "None", "label": "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions", "metadata": {"prompt": "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions", "negative_prompt": "", "Seed": 131541594365878, "steps": 1, "width": "768px", "height": "512px", "Dimension": 2, "Top-k": 280, "Top-p": 1150, "Randomness": 0.7, "cfg": 8.5, "overlap": 1, "Melody Condition": "vampire-mid", "Sample Segment": -1, "Duration": 5, "Audio": "C:\\Users\\CHARLE~1\\AppData\\Local\\Temp\\edm_my_vampiresyp05x2i9.wav", "font": "./assets/arial.ttf", "font_color": "rgba(200, 5.000000000000011, 5.000000000000011, 1)", "harmony_only": "Yes", "background": "C:\\Users\\CHARLE~1\\AppData\\Local\\Temp\\tmpk20ym4na.png", "include_title": true, "include_settings": false, "profile": "<gradio.components.state.State object at 0x00000242E1327400>", "commit": "398335e4525555d286b390e6618b76302bd7c85b", "tag": "v0.0.1-98-g398335e", "version": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/398335e4525555d286b390e6618b76302bd7c85b", "model_version": "1.2.Surn", "model_name": "facebook/musicgen-stereo-melody-large", "model_description": "2 channels, 32000 Hz", "melody_name": "vampire-mid", "melody_extension": ".mp3", "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen", "python": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]", "torch": "2.6.0+cu124", "xformers": "0.0.29.post3", "gradio": "5.23.3", "huggingface_space": "", "CUDA": "CUDA is available. device: NVIDIA GeForce RTX 4090 version: 12.4", "datetime": "2025-04-05 02:37:32.476231"}}
@@ -17,6 +17,7 @@
 import time
 import typing as tp
 import warnings
+from tqdm import tqdm
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
 from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect
@@ -48,6 +49,7 @@
 os.environ['USE_FLASH_ATTENTION'] = '1'
 os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'
 
+
 def interrupt_callback():
     return INTERRUPTED
 
@@ -162,7 +164,7 @@ def load_melody_filepath(melody_filepath, title, assigned_model):
 
     return  gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True)
 
-def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False):
+def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, progress=gr.Progress(track_tqdm=True)):
     global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
     output_segments = None
     melody_name = "Not Used"
@@ -228,14 +230,16 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
             cfg_coef=cfg_coef,
             duration=segment_duration,
             two_step_cfg=False,
+            extend_stride=10,
             rep_penalty=0.5
         )
+        MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True))
 
         try:
             if melody:
                 # return excess duration, load next model and continue in loop structure building up output_segments
                 if duration > MODEL.lm.cfg.dataset.segment_duration:
-                    output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only=False)
+                    output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only=False, progress=gr.Progress(track_tqdm=True))
                 else:
                     # pure original code
                     sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
@@ -247,20 +251,20 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
                         descriptions=[text],
                         melody_wavs=melody,
                         melody_sample_rate=sr,
-                        progress=True
+                        progress=True, progress_callback=gr.Progress(track_tqdm=True)
                     )
                 # All output_segments are populated, so we can break the loop or set duration to 0
                 break
             else:
                 #output = MODEL.generate(descriptions=[text], progress=False)
                 if not output_segments:
-                    next_segment = MODEL.generate(descriptions=[text], progress=True)
+                    next_segment = MODEL.generate(descriptions=[text], progress=True, progress_callback=gr.Progress(track_tqdm=True))
                     duration -= segment_duration
                 else:
                     last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:]
-                    next_segment = MODEL.generate_continuation(last_chunk, MODEL.sample_rate, descriptions=[text], progress=True)
+                    next_segment = MODEL.generate_continuation(last_chunk, MODEL.sample_rate, descriptions=[text], progress=True, progress_callback=gr.Progress(track_tqdm=True))
                     duration -= segment_duration - overlap
-                if next_segment != None:                
+                if next_segment != None:
                     output_segments.append(next_segment)
         except Exception as e:
             print(f"Error generating audio: {e}")
@@ -312,7 +316,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
             return None, None, seed
         else:
             output = output.detach().cpu().float()[0]
-    profile: gr.OAuthProfile | None = None
+
     title_file_name = convert_title_to_filename(title)
     with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix = title_file_name) as file:
         video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
@@ -357,7 +361,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
                 "background": background,
                 "include_title": include_title,
                 "include_settings": include_settings,
-                "profile": profile,
+                "profile": "Satoshi Nakamoto" if profile.value is None else profile.value.username,
                 "commit": commit_hash(),
                 "tag": git_tag(),
                 "version": gr.__version__,
@@ -396,11 +400,11 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
 
         if waveform_video_path:
             modules.user_history.save_file(
-            profile=profile,
+            profile=profile.value,
             image=background,
-            audio=file,
+            audio=file.name,
             video=waveform_video_path,
-            label=text,
+            label=title,
             metadata=metadata,
         )
 
@@ -413,9 +417,9 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
     torch.cuda.ipc_collect()
     return waveform_video_path, file.name, seed
 
-gr.set_static_paths(paths=["fonts/","assets/"])
+gr.set_static_paths(paths=["fonts/","assets/", "images/"])
 def ui(**kwargs):
-    with gr.Blocks(title="UnlimitedMusicGen",css_paths="style_20250331.css", theme='Surn/beeuty') as interface:
+    with gr.Blocks(title="UnlimitedMusicGen",css_paths="style_20250331.css", theme='Surn/beeuty') as demo:
         with gr.Tab("UnlimitedMusicGen"):
             gr.Markdown(
                 """
@@ -482,12 +486,12 @@ def ui(**kwargs):
                     with gr.Column() as c:
                         output = gr.Video(label="Generated Music")
                         wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
-                        seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
+                        seed_used = gr.Number(label='Seed used', value=-1, interactive=False)                        
 
             radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
             melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model], outputs=[title, prompt_index , model], api_name="melody_filepath_change", queue=False)
             reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
-            submit.click(predict, inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only], outputs=[output, wave_file, seed_used], api_name="submit")
+            
             gr.Examples(
                 examples=[
                     [
@@ -524,9 +528,24 @@ def ui(**kwargs):
                 inputs=[text, melody_filepath, model, title],
                 outputs=[output]
             )
-            gr.HTML(value=versions_html(), visible=True, elem_id="versions")
+            
         with gr.Tab("User History") as history_tab:
             modules.user_history.render()
+        user_profile = gr.State(None)
+            
+        with gr.Row("Versions") as versions_row:
+            gr.HTML(value=versions_html(), visible=True, elem_id="versions")
+
+        submit.click(
+            modules.user_history.get_profile,
+            inputs=[],
+            outputs=[user_profile],
+            queue=True,
+            api_name="submit"
+         ).then(
+             predict,
+             inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile],
+             outputs=[output, wave_file, seed_used])
 
         # Show the interface
         launch_kwargs = {}
 
@@ -7,4 +7,4 @@
 # flake8: noqa
 from . import data, modules, models
 
-__version__ = '1.4.Surn'
+__version__ = '1.2.Surn'
@@ -15,6 +15,7 @@
 
 import omegaconf
 import torch
+import gradio as gr
 
 from .encodec import CompressionModel
 from .lm import LMModel
@@ -67,7 +68,7 @@ def __init__(self, name: str, compression_model: CompressionModel, lm: LMModel,
         self.device = next(iter(lm.parameters())).device
         self.generation_params: dict = {}
         self.set_generation_params(duration=self.duration)  # 15 seconds by default
-        self._progress_callback: tp.Optional[tp.Callable[[int, int], None]] = None
+        self._progress_callback: tp.Union[tp.Callable[[int, int], None], gr.Progress] = None
         if self.device.type == 'cpu':
             self.autocast = TorchAutocast(enabled=False)
         else:
@@ -142,7 +143,7 @@ def get_pretrained(name: str = 'melody-large', device=None):
     def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
                               top_p: float = 0.0, temperature: float = 1.0,
                               duration: float = 30.0, cfg_coef: float = 3.0,
-                              two_step_cfg: bool = False, extend_stride: float = 18, rep_penalty: float = None):
+                              two_step_cfg: bool = False, extend_stride: float = 10, rep_penalty: float = None):
         """Set the generation parameters for MusicGen.
 
         Args:
@@ -173,12 +174,12 @@ def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
             'two_step_cfg': two_step_cfg,
         }
 
-    def set_custom_progress_callback(self, progress_callback: tp.Optional[tp.Callable[[int, int], None]] = None):
+    def set_custom_progress_callback(self, progress_callback: tp.Union[tp.Callable[[int, int], None],gr.Progress] = None):
         """Override the default progress callback."""
         self._progress_callback = progress_callback
 
     def generate_unconditional(self, num_samples: int, progress: bool = False,
-                               return_tokens: bool = False) -> tp.Union[torch.Tensor,
+                               return_tokens: bool = False, progress_callback: gr.Progress = None) -> tp.Union[torch.Tensor, 
                                                                         tp.Tuple[torch.Tensor, torch.Tensor]]:
         """Generate samples in an unconditional manner.
 
@@ -194,7 +195,7 @@ def generate_unconditional(self, num_samples: int, progress: bool = False,
             return self.generate_audio(tokens), tokens
         return self.generate_audio(tokens)
 
-    def generate(self, descriptions: tp.List[str], progress: bool = False, return_tokens: bool = False) \
+    def generate(self, descriptions: tp.List[str], progress: bool = False, return_tokens: bool = False, progress_callback: gr.Progress = None) \
             -> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
         """Generate samples conditioned on text.
 
@@ -212,7 +213,7 @@ def generate(self, descriptions: tp.List[str], progress: bool = False, return_to
 
     def generate_with_chroma(self, descriptions: tp.List[str], melody_wavs: MelodyType,
                              melody_sample_rate: int, progress: bool = False,
-                             return_tokens: bool = False) -> tp.Union[torch.Tensor,
+                             return_tokens: bool = False, progress_callback=gr.Progress(track_tqdm=True)) -> tp.Union[torch.Tensor,
                                                                       tp.Tuple[torch.Tensor, torch.Tensor]]:
         """Generate samples conditioned on text and melody.
 
@@ -250,7 +251,7 @@ def generate_with_chroma(self, descriptions: tp.List[str], melody_wavs: MelodyTy
         return self.generate_audio(tokens)
 
     def generate_with_all(self, descriptions: tp.List[str], melody_wavs: MelodyType,
-                             sample_rate: int, progress: bool = False, prompt: tp.Optional[torch.Tensor] = None, return_tokens: bool = False) \
+                             sample_rate: int, progress: bool = False, prompt: tp.Optional[torch.Tensor] = None, return_tokens: bool = False, progress_callback: gr.Progress = None) \
             -> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
         """Generate samples conditioned on text and melody and audio prompts.
         Args:
@@ -307,7 +308,7 @@ def generate_with_all(self, descriptions: tp.List[str], melody_wavs: MelodyType,
 
     def generate_continuation(self, prompt: torch.Tensor, prompt_sample_rate: int,
                               descriptions: tp.Optional[tp.List[tp.Optional[str]]] = None,
-                              progress: bool = False, return_tokens: bool = False) \
+                              progress: bool = False, return_tokens: bool = False, progress_callback: gr.Progress = None) \
             -> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
         """Generate samples conditioned on audio prompts.
 
@@ -317,7 +318,8 @@ def generate_continuation(self, prompt: torch.Tensor, prompt_sample_rate: int,
             prompt_sample_rate (int): Sampling rate of the given audio waveforms.
             descriptions (list of str, optional): A list of strings used as text conditioning. Defaults to None.
             progress (bool, optional): Flag to display progress of the generation process. Defaults to False.
-            return_tokens (bool, optional): If True, also return the generated tokens. Defaults to False.
+            return_tokens (bool, optional): If True, also return the generated tokens. Defaults to False.\
+            This is truly a hack and does not follow the progression of conditioning melody or previously generated audio.
         """
         if prompt.dim() == 2:
             prompt = prompt[None]
@@ -338,7 +340,8 @@ def _prepare_tokens_and_attributes(
             self,
             descriptions: tp.Sequence[tp.Optional[str]],
             prompt: tp.Optional[torch.Tensor],
-            melody_wavs: tp.Optional[MelodyList] = None,
+            melody_wavs: tp.Optional[MelodyList] = None, 
+            progress_callback: tp.Optional[gr.Progress] = None
     ) -> tp.Tuple[tp.List[ConditioningAttributes], tp.Optional[torch.Tensor]]:
         """Prepare model inputs.
 
@@ -392,7 +395,7 @@ def _prepare_tokens_and_attributes(
         return attributes, prompt_tokens
 
     def _generate_tokens(self, attributes: tp.List[ConditioningAttributes],
-                         prompt_tokens: tp.Optional[torch.Tensor], progress: bool = False) -> torch.Tensor:
+                         prompt_tokens: tp.Optional[torch.Tensor], progress: bool = False, progress_callback: gr.Progress = None) -> torch.Tensor:
         """Generate discrete audio tokens given audio prompt and/or conditions.
 
         Args:
@@ -411,17 +414,19 @@ def _progress_callback(generated_tokens: int, tokens_to_generate: int):
             if self._progress_callback is not None:
                 # Note that total_gen_len might be quite wrong depending on the
                 # codebook pattern used, but with delay it is almost accurate.
-                self._progress_callback(generated_tokens, total_gen_len)
-            else:
+                self._progress_callback((generated_tokens / total_gen_len), f"Generated {generated_tokens}/{total_gen_len} tokens")
+            if progress_callback is not None:
+                # Update Gradio progress bar
+                progress_callback((generated_tokens / total_gen_len), f"Generated {generated_tokens}/{total_gen_len} tokens")
+            if progress:
                 print(f'{generated_tokens: 6d} / {total_gen_len: 6d}', end='\r')
 
         if prompt_tokens is not None:
             assert max_prompt_len >= prompt_tokens.shape[-1], \
                 "Prompt is longer than audio to generate"
 
-        callback = None
-        if progress:
-            callback = _progress_callback
+        # callback = None
+        callback = _progress_callback
 
         if self.duration <= self.max_duration:
             # generate by sampling from LM, simple case.
@@ -481,7 +486,7 @@ def _progress_callback(generated_tokens: int, tokens_to_generate: int):
 
         # generate audio
 
-    def generate_audio(self, gen_tokens: torch.Tensor):        
+    def generate_audio(self, gen_tokens: torch.Tensor):
         try:
             """Generate Audio from tokens"""
             assert gen_tokens.dim() == 3
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+{"image_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\images\\4bdae45ae7ca48449ad5b6647eed48ea_tmpk20ym4na.png", "video_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\videos\\edm_my_vampires_74dff59ee9da4b30babfc049e1fdbbc3.mp4", "audio_path": "G:\\Projects\\audiocraft\\_user_history\\Surn\\audios\\edm_my_vampiresyp05x2i9_d1435bab236e4695a1e26d6bc1b6739f.wav", "document_path": "None", "label": "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions", "metadata": {"prompt": "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions", "negative_prompt": "", "Seed": 131541594365878, "steps": 1, "width": "768px", "height": "512px", "Dimension": 2, "Top-k": 280, "Top-p": 1150, "Randomness": 0.7, "cfg": 8.5, "overlap": 1, "Melody Condition": "vampire-mid", "Sample Segment": -1, "Duration": 5, "Audio": "C:\\Users\\CHARLE~1\\AppData\\Local\\Temp\\edm_my_vampiresyp05x2i9.wav", "font": "./assets/arial.ttf", "font_color": "rgba(200, 5.000000000000011, 5.000000000000011, 1)", "harmony_only": "Yes", "background": "C:\\Users\\CHARLE~1\\AppData\\Local\\Temp\\tmpk20ym4na.png", "include_title": true, "include_settings": false, "profile": "<gradio.components.state.State object at 0x00000242E1327400>", "commit": "398335e4525555d286b390e6618b76302bd7c85b", "tag": "v0.0.1-98-g398335e", "version": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/398335e4525555d286b390e6618b76302bd7c85b", "model_version": "1.2.Surn", "model_name": "facebook/musicgen-stereo-melody-large", "model_description": "2 channels, 32000 Hz", "melody_name": "vampire-mid", "melody_extension": ".mp3", "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen", "python": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]", "torch": "2.6.0+cu124", "xformers": "0.0.29.post3", "gradio": "5.23.3", "huggingface_space": "", "CUDA": "CUDA is available. device: NVIDIA GeForce RTX 4090 version: 12.4", "datetime": "2025-04-05 02:37:32.476231"}}