Skip to content

Commit e036719

Browse files
authored
Merge pull request #7 from leopiney/feat/upgrade-ag2-and-swarms
Upgrade ag2 and use swarms agents approach
2 parents 276c3b3 + 600c254 commit e036719

23 files changed

+3672
-3568
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,7 @@ Thumbs.db
110110
.cursorrules
111111

112112
# Project-specific
113+
chroma/
113114
data/
114-
output/
115+
output/
116+
parsed_docs/

config/config_hume.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"show": {
3+
"name": "The NeuralNoise Podcast",
4+
"about": "A podcast about the future of AI and the impact it will have on our lives. We discuss interesting topics in AI and technology.",
5+
"language": "English"
6+
},
7+
"speakers": {
8+
"speaker1": {
9+
"name": "Zach",
10+
"about": "Zach is a software engineer and the founder of NeuralNoise.",
11+
"settings": {
12+
"provider": "hume",
13+
"voice_id": "dfcdd7c7-8255-4216-9ca7-4d4d7b991fc7"
14+
}
15+
},
16+
"speaker2": {
17+
"name": "Emily",
18+
"about": "Emily is the co-host of the NeuralNoise podcast, she is energetic, and a product designer.",
19+
"settings": {
20+
"provider": "hume",
21+
"voice_id": "3e90dd7e-de46-448a-b143-10f521f84ac9"
22+
}
23+
}
24+
}
25+
}

poetry.lock

Lines changed: 0 additions & 2455 deletions
This file was deleted.

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "neuralnoise"
3-
version = "1.5.0"
3+
version = "2.0.1"
44
description = "An AI-powered podcast studio that uses multiple AI agents working together."
55
authors = [
66
{ name = "Leonardo Piñeyro", email = "[email protected]" }
@@ -10,7 +10,7 @@ readme = "README.md"
1010
license = { file = "LICENSE" }
1111

1212
keywords = [
13-
"ai", "podcast", "notebooklm", "autogen", "ai audio generation", "generative ai"
13+
"ai", "podcast", "notebooklm", "ag2", "autogen", "ai audio generation", "generative ai"
1414
]
1515
classifiers = [
1616
"Development Status :: 4 - Beta",
@@ -31,11 +31,12 @@ classifiers = [
3131

3232
requires-python = ">=3.10,<3.12"
3333
dependencies = [
34-
"ag2>=0.5.2",
34+
"ag2[rag]>=0.7.6",
3535
"backoff>=2.2.1",
3636
"beautifulsoup4>=4.12.3",
37-
"crawl4ai>=0.3.3",
37+
"crawl4ai==0.4.0",
3838
"elevenlabs>=1.10.0",
39+
"hume[microphone]>=0.4.2",
3940
"langchain-community>=0.3.3",
4041
"lxml>=5.3.0",
4142
"openai>=1.52.2",

src/neuralnoise/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def generate(
4545
output_dir = Path("output") / name
4646
output_dir.mkdir(parents=True, exist_ok=True)
4747

48-
content_path = output_dir / "content.txt"
48+
content_path = output_dir / "content.md"
4949

5050
if content_path.exists():
5151
with open(content_path, "r") as f:
@@ -61,7 +61,7 @@ def generate(
6161
typer.secho(f"Extracting content from inputs {input}", fg=typer.colors.YELLOW)
6262
content = extract_content(input)
6363

64-
with open(output_dir / "content.txt", "w") as f:
64+
with open(content_path, "w") as f:
6565
f.write(content)
6666

6767
typer.secho(f"Generating podcast episode {name}", fg=typer.colors.GREEN)

src/neuralnoise/extract.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,10 @@ async def _extract_multiple_sources(
136136
*[_extract_single_source(source, use_async=use_async) for source in sources]
137137
)
138138

139-
return "\n\n".join(f"<document>\n{content}\n</document>" for content in contents)
139+
return "\n\n".join(
140+
f"# Document START: {source}\n\n{content}\n\n# Document END: {source}\n\n"
141+
for source, content in zip(sources, contents)
142+
)
140143

141144

142145
# Public API functions

src/neuralnoise/models.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from pathlib import Path
22
from textwrap import dedent
3-
from typing import Literal
3+
from typing import Any, Literal
44

5-
from pydantic import BaseModel, Field, field_validator
5+
from pydantic import BaseModel, Field
66

77

88
class VoiceSettings(BaseModel):
@@ -15,7 +15,7 @@ class VoiceSettings(BaseModel):
1515
class SpeakerSettings(BaseModel):
1616
voice_id: str
1717

18-
provider: Literal["elevenlabs", "openai"] = "elevenlabs"
18+
provider: Literal["elevenlabs", "openai", "hume"] = "elevenlabs"
1919
voice_model: Literal["eleven_multilingual_v2", "tts-1", "tts-1-hd"] = (
2020
"eleven_multilingual_v2"
2121
)
@@ -97,14 +97,40 @@ class ScriptSegment(BaseModel):
9797
None, description="Time in seconds for silence after speaking"
9898
)
9999

100-
@field_validator("blank_duration")
101-
def validate_blank_duration(cls, v):
102-
if v is not None and v not in (0.1, 0.2, 0.5):
103-
raise ValueError("blank_duration must be 0.1, 0.2, or 0.5 seconds")
104-
return v
105-
106100

107101
class PodcastScript(BaseModel):
108102
section_id: int
109103
section_title: str
110104
segments: list[ScriptSegment]
105+
106+
107+
class SharedContext(BaseModel):
108+
"""Manages shared state for content processing and section management."""
109+
110+
content_analysis: dict[str, Any] | None = Field(
111+
default=None, description="Analysis results of the processed content"
112+
)
113+
section_scripts: dict[int, dict[str, Any]] = Field(
114+
default_factory=dict,
115+
description="Mapping of section indices to their associated scripts",
116+
)
117+
section_feedbacks: dict[int, list[str]] = Field(
118+
default_factory=dict,
119+
description="Mapping of section indices to their associated feedback",
120+
)
121+
execution_plans: str = Field(
122+
default="",
123+
description="Execution plans for the complete podcast, specifying all required sections",
124+
)
125+
current_section_index: int = Field(
126+
default=0, description="Index of the currently active section"
127+
)
128+
is_complete: bool = Field(
129+
default=False, description="Flag indicating if processing is complete"
130+
)
131+
errors: list[str] = Field(
132+
default_factory=list, description="List of errors encountered during processing"
133+
)
134+
warnings: list[str] = Field(
135+
default_factory=list, description="List of warnings generated during processing"
136+
)

src/neuralnoise/prompt_manager.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from enum import Enum, auto
2+
from pathlib import Path
3+
from string import Template
4+
5+
6+
class PromptType(Enum):
7+
"""Enumeration of all available prompt types."""
8+
9+
CONTENT_ANALYZER = auto()
10+
PLANNER = auto()
11+
SCRIPT_GENERATOR = auto()
12+
EDITOR = auto()
13+
USER_PROXY = auto()
14+
USER_MESSAGE = auto()
15+
MANAGER = auto()
16+
17+
18+
class PromptManager:
19+
"""
20+
Manages loading and caching of system prompts for the neuralnoise application.
21+
22+
This class provides a centralized way to load and access prompts from the prompts directory.
23+
It loads all prompts during initialization and provides methods to access and substitute
24+
variables in the prompts.
25+
"""
26+
27+
_instance = None
28+
29+
def __new__(cls, *args, **kwargs):
30+
"""Implement singleton pattern to ensure only one instance exists."""
31+
if cls._instance is None:
32+
cls._instance = super(PromptManager, cls).__new__(cls)
33+
return cls._instance
34+
35+
def __init__(self, prompts_dir: Path | None = None, language: str = "en"):
36+
"""
37+
Initialize the PromptManager with the prompts directory and language.
38+
39+
Args:
40+
prompts_dir: Directory containing prompt files. If None, uses the default package prompts.
41+
language: Language code for prompt templates.
42+
"""
43+
# Skip initialization if already initialized
44+
if hasattr(self, "_initialized") and self._initialized:
45+
return
46+
47+
self.language = language
48+
49+
# Set prompts directory
50+
if prompts_dir is None:
51+
from neuralnoise.utils import package_root
52+
53+
self.prompts_dir = package_root / "prompts"
54+
else:
55+
self.prompts_dir = Path(prompts_dir)
56+
57+
# Map of PromptType to file paths
58+
self.prompt_files = {
59+
PromptType.CONTENT_ANALYZER: self.prompts_dir
60+
/ "content_analyzer.system.xml",
61+
PromptType.PLANNER: self.prompts_dir / "planner.system.xml",
62+
PromptType.SCRIPT_GENERATOR: self.prompts_dir
63+
/ "script_generation.system.xml",
64+
PromptType.EDITOR: self.prompts_dir / "editor.system.xml",
65+
PromptType.USER_PROXY: self.prompts_dir / "user_proxy.system.xml",
66+
PromptType.USER_MESSAGE: self.prompts_dir / "user_proxy.message.xml",
67+
PromptType.MANAGER: self.prompts_dir / "manager.system.xml",
68+
}
69+
70+
# Load all prompts
71+
self.prompts: dict[PromptType, str] = {}
72+
self._load_all_prompts()
73+
74+
self._initialized = True
75+
76+
def _load_all_prompts(self) -> None:
77+
"""Load all prompts from the prompts directory."""
78+
for prompt_type, file_path in self.prompt_files.items():
79+
self.prompts[prompt_type] = self._load_prompt_file(file_path)
80+
81+
def _load_prompt_file(self, path: Path) -> str:
82+
"""
83+
Load a prompt from a file.
84+
85+
Args:
86+
path: Path to the prompt file.
87+
88+
Returns:
89+
The content of the prompt file or an empty string if the file doesn't exist.
90+
"""
91+
if not path.exists():
92+
return ""
93+
94+
with open(path, "r", encoding="utf-8") as f:
95+
content = f.read()
96+
97+
return content
98+
99+
def get_prompt(self, prompt_type: PromptType, **kwargs) -> str:
100+
"""
101+
Get a prompt with variables substituted.
102+
103+
Args:
104+
prompt_type: Type of prompt to get.
105+
**kwargs: Variables to substitute in the prompt.
106+
107+
Returns:
108+
The prompt with variables substituted.
109+
"""
110+
content = self.prompts.get(prompt_type, "")
111+
112+
# Always include language in kwargs if not provided
113+
if "language" not in kwargs:
114+
kwargs["language"] = self.language
115+
116+
if content and kwargs:
117+
template = Template(content)
118+
content = template.safe_substitute(kwargs)
119+
120+
return content
121+
122+
def update_prompt(self, prompt_type: PromptType, **common_kwargs) -> None:
123+
"""
124+
Update a prompt with common variables substituted.
125+
126+
Args:
127+
**common_kwargs: Common variables to substitute in all prompts.
128+
129+
Returns:
130+
Dictionary mapping prompt names to prompt content.
131+
"""
132+
self.prompts[prompt_type] = self.get_prompt(prompt_type, **common_kwargs)
133+
134+
def update_prompts(self, **common_kwargs) -> None:
135+
"""
136+
Update all prompts with common variables substituted.
137+
138+
Args:
139+
**common_kwargs: Common variables to substitute in all prompts.
140+
141+
Returns:
142+
Dictionary mapping prompt names to prompt content.
143+
"""
144+
for prompt_type in PromptType:
145+
self.prompts[prompt_type] = self.get_prompt(prompt_type, **common_kwargs)

src/neuralnoise/prompts/content_analyzer.system.xml

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,9 @@
77
<instruction>Remember to create a final section with conclusions and podcast wrap-up.</instruction>
88
<instruction>Create sections that cover the main points and arguments of the content.</instruction>
99
<instruction>The user will write the content in the XML tag named <![CDATA[ <content> ... </content> ]]></instruction>
10-
<instruction>If there are multiple content documents, you'll receive them enclosed individually
11-
in an XML tag named <![CDATA[ <document> ... </document> ]]></instruction>
1210
</instructions>
1311
<output-format>
14-
Provide your analysis in JSON format that conforms to the following TypeScript interface:
15-
16-
<![CDATA[
17-
interface ContentAnalysis {
18-
title: string;
19-
summary: string;
20-
keyPoints: string[];
21-
tone: string;
22-
targetAudience: string;
23-
potentialSegments: {
24-
topic: string;
25-
duration: number;
26-
discussionPoints: string[];
27-
}[];
28-
controversialTopics: string[];
29-
}
30-
]]>
12+
Provide your analysis in JSON format that conforms to the ContentAnalysis schema.
3113
</output-format>
3214
<language>
3315
${language}

src/neuralnoise/prompts/editor.system.xml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
<instructions>
88
<instruction>Evaluate structure, depth, transitions, and dialogue naturalness</instruction>
9-
<instruction>Limit iterations to 2 per generated section</instruction>
109
<instruction>Ensure natural conversation flow</instruction>
1110
<instruction>Avoid formal introductions/conclusions for sections</instruction>
1211
<instruction>Encourage quick interactions and questions between speakers</instruction>
@@ -15,21 +14,17 @@
1514
<instruction>Content flow and engagement: make sure to not talk about the last topic in the the
1615
introductions. Engage the user introducing the topics slowly</instruction>
1716
<instruction>Ask the ScriptGeneratorAgent to generate a few more segments with reactions or
18-
questions if
19-
needed.</instruction>
17+
questions if needed.</instruction>
2018
</instructions>
2119

2220
<output-format>
2321
Provide concise editing suggestions.
24-
Alternatively, if the script is approved, conclude with 'EDITOR-OK'.
2522
</output-format>
2623

2724
<important-notes>
28-
<important> Only the EditorAgent can write "EDITOR-OK"</important>
2925
<important>Focus on the latest script version from the ScriptGeneratorAgent</important>
3026
<important>If you provide editing suggestions, the ScriptGeneratorAgent will generate a new
31-
script based on your suggestions. Don't say EDITOR-OK in this case.</important>
32-
<important>PlannerAgent proceeds to the next section after "EDITOR-OK"</important>
27+
script based on your suggestions.</important>
3328
</important-notes>
3429
<language>
3530
${language}

0 commit comments

Comments
 (0)