SEO_tools/SEO_StreamlitUI.py at main · thatssostefo/SEO_tools · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import streamlit as st
import pandas as pd
import io
import os
from dotenv import load_dotenv
from docx import Document
from google import genai

# --- API Configuraton ---
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("Please set the GOOGLE_API_KEY environment variable.")

client = genai.Client(api_key=api_key)

# -------------------- Utilities --------------------
def safe_filename(text: str):
    return "".join(ch if ch.isalnum() or ch in (" ", "_", "-") else "_" for ch in text).strip().replace(" ", "_")

def parse_model_csv_output(raw_text: str) -> pd.DataFrame:
    if not raw_text.strip():
        raise ValueError("Empty model output")
    lines = [ln.strip() for ln in raw_text.splitlines() if ln.strip()]
    header_index = 0
    for i, ln in enumerate(lines):
        if "cluster" in ln.lower() and "primary" in ln.lower():
            header_index = i
            break
    csv_text = "\n".join(lines[header_index:])
    try:
        return pd.read_csv(io.StringIO(csv_text), skip_blank_lines=True, quotechar='"')
    except Exception:
        return pd.read_csv(io.StringIO(csv_text), sep=",", engine="python", skip_blank_lines=True, quotechar='"')

def save_docx_formatted(article_text: str, keyword: str) -> io.BytesIO:

    doc = Document()
    lines = [ln.rstrip() for ln in article_text.splitlines()]
    first_nonempty = next((ln.strip() for ln in lines if ln.strip()), "")
    if first_nonempty and len(first_nonempty) <= 120:
        doc.add_heading(first_nonempty, level=1)
        removed = False
        new_lines = []
        for ln in lines:
            if not removed and ln.strip() == first_nonempty:
                removed = True
                continue
            new_lines.append(ln)
        lines = new_lines
    else:
        doc.add_heading(f"Article: {keyword}", level=1)

    buffer_par = []
    def flush_buffer():
        nonlocal buffer_par
        if buffer_par:
            paragraph_text = " ".join(p.strip() for p in buffer_par if p.strip())
            doc.add_paragraph(paragraph_text)
            buffer_par = []

    for ln in lines:
        if not ln.strip():
            flush_buffer()
            continue
        s = ln.strip()
        if s.startswith("# "):
            flush_buffer()
            doc.add_heading(s[2:].strip(), level=2)
            continue
        if s.startswith("## "):
            flush_buffer()
            doc.add_heading(s[3:].strip(), level=3)
            continue
        if s.startswith("- ") or s.startswith("* "):
            flush_buffer()
            doc.add_paragraph(s[2:].strip(), style="List Bullet")
            continue
        buffer_par.append(ln)
    flush_buffer()

    # Save to buffer, not a file
    buffer = io.BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer

# -------------------- GUI --------------------
st.set_page_config(page_title="SEO Assistant with AI",
    layout="centered",
    page_icon="https://stefanomandola.com/wp-content/uploads/2025/10/seo-assistant-with-ai-logo-1.png",
    initial_sidebar_state="expanded",)

# --- Session State Initialization ---
defaults = {
    "cluster_df": None,
    "article_text": "",
    "article_title": "",
    "article_meta": "",
}
for key, value in defaults.items():
    if key not in st.session_state:
        st.session_state[key] = value

st.title("👩🏻‍💻 SEO Assistant with Gemini AI")

st.markdown('''Keyword research and content creation can be slow and fragmented.
    This tool clusters keywords by **topic and intent**, letting you organize SEO topics or even Google Ads campaigns efficiently.
    From these clusters, you can generate **structured AI-powered article drafts** to support your content strategy.''')

# --- Sidebar ---
with st.sidebar:
    st.logo("https://stefanomandola.com/wp-content/uploads/2025/10/seo-assistant-with-ai-logo-1.png", size="large")

st.sidebar.header("SEO automations")

with st.sidebar:
    st.markdown("🗂️ [Keyword Clustering](#cluster-your-keywords)")
    st.markdown("📝 [Article Generation](#generate-your-article)")

st.sidebar.markdown("<br><br>", unsafe_allow_html=True)

# --- Sidebar footer ---

with st.sidebar:
    st.warning("AI results may be inaccurate. "
           "Review and adjust the output to ensure correctness, brand fit, and search intent alignment.",
          icon='⚠️')

st.sidebar.markdown("---")
st.sidebar.markdown("**Find me on:**")
st.sidebar.markdown(
    """
    🔗 [Website](https://stefanomandola.com/)
    👨‍💻 [LinkedIn](https://www.linkedin.com/in/stefanomandola/)
    🛠️ [GitHub](https://github.com/thatssostefo)
    """,
    unsafe_allow_html=True
)

# --- Keyword Clustering ---

st.header("Cluster your keywords")

st.info("Automatically group your keywords by meaning and intent. "
        "You'll get a clean table showing your clusters that you can use "
        "for your content strategy, to create content pillars, or even to organize PPC paid campaigns. "
        "**Remember:** this is only part of the process. Manually review the clusters and adjust them as needed to ensure accuracy and relevance.",
        icon='ℹ️')
keywords_input = st.text_area("Enter your keywords", placeholder='''python for seo
automate seo with pyton
advanced python seo
...''',
    height=150)

cluster_btn = st.button("Cluster Keywords")

if cluster_btn:
    if not keywords_input.strip():
        st.error("Please enter at least one keyword.")
    else:
        keywords = [k.strip() for k in keywords_input.splitlines() if k.strip()]
        contents = (
            "You are an SEO expert. Cluster the following keywords based on semantic similarity and search intent.\n\n"
            "For each cluster, choose one representative keyword as 'Primary Keyword' and list all other related keywords in 'Related Keywords'."
            f"Keywords: {', '.join(keywords)}.\n\n"
            "Return only the csv rows, no additional commentary, no markdown formatting, no code blocks. Include this column header as the first row:\n"
            "Cluster,Primary Keyword,Related Keywords,Keyword Intent\n"
        )
        with st.spinner("Clustering keywords..."):
            cluster_resp = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=contents)
            raw_text = cluster_resp.text.strip()
            df = parse_model_csv_output(raw_text)
            st.session_state.cluster_df = df
            st.success(f"Clustering done. {len(df)} rows generated.")

# Always display the table if it exists in session_state
if st.session_state.cluster_df is not None:
    st.write("### Clustered keywords")
    st.dataframe(st.session_state.cluster_df)

    # Convert DataFrame to CSV string
    csv_buffer = io.StringIO()
    st.session_state.cluster_df.to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)

    st.download_button(
        label="Download as CSV",
        data=csv_buffer.getvalue(),  # <-- get the string content
        file_name="clustered_keywords.csv",
        mime="text/csv"
    )

st.divider()

# --- Article Generation ---

st.header("Generate your article")
st.info("Turn a topic into a structured SEO article with title, meta description, FAQs, and clean formatting. "
        "Insert your primary keyword, set your word count, and provide additional information if needed. "
        "You'll get a draft in .docx format ready for review. "
        "**Remember:** use the output as a starting point to organize your ideas, "
        "but make sure to add your expertise and POV to the topic, in accordance with Google's EEAT guidelines.",
        icon='ℹ️')

primary_keyword = st.text_input("Primary keyword", placeholder="python for seo")
secondary_keywords = st.text_area("Secondary keywords (optional, comma-separated)", placeholder="python scripts for seo, python for seo projects, python seo tools, ...")
word_count = st.selectbox("Word count", ["500", "1000", "1500", "2000", "2500", "3000"], index=0)
additional_instructions = st.text_area("Additional instructions", height=200, placeholder='''Include answers the following FAQs:
- Can Python be used for SEO?
- Why learn Python as an SEO?
- Is Python good for web scraping?
...''')
generate_btn = st.button("Generate Article")

if generate_btn:
    if not primary_keyword.strip():
        st.error("Please enter a primary keyword.")
    else:
        wc = int(word_count)
        article_prompt = (
            f'You are an SEO expert and content writer.\n'
            f'Write an article of about {wc} words about "{primary_keyword}".\n'
            f'Use or include these related keywords where relevant: {secondary_keywords}.\n'
            f'Additional instructions: {additional_instructions}\n'
            'Requirements:\n'
            "- Use BLUF (most important info first)\n"
            "- Include a short bullet list of key takeaways near the top\n"
            "- Use headings (H2/H3). Structure headlines as questions where appropriate.\n"
            "- Keep sentences short and readable\n"
            "- Insert a table where to better show clearly complex concepts\n"
            "- Format output as plain text (no markdown symbols), with headings on their own lines.\n"
        )
        title_meta_prompt = (
            f'You are an SEO expert. For the article about "{primary_keyword}" produce two outputs only:\n'
            "1) A SEO title (max 60 characters).\n"
            "2) A meta description (max 160 characters).\n"
            "Return them on separate lines, first the title, then the meta description. Do not add extra commentary.\n"
        )

        with st.spinner("Generating article..."):

            # Article body
            article_resp = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=article_prompt)
            article_text = article_resp.text.strip()

            # SEO title + meta description
            title_meta_resp = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=title_meta_prompt)
            tm_lines = [ln.strip() for ln in title_meta_resp.text.splitlines() if ln.strip()]
            title = tm_lines[0] if len(tm_lines) > 0 else ""
            meta = tm_lines[1] if len(tm_lines) > 1 else ""

            st.session_state.article_text = article_text
            st.session_state.article_title = title
            st.session_state.article_meta = meta
            st.success("Article generated!")

# --- Article Preview & Download ---
if st.session_state.article_text:
    st.subheader("Article preview")

    st.text_input("SEO Title", value=st.session_state.article_title)
    st.text_input("Meta Description", value=st.session_state.article_meta)
    st.text_area("Article", value=st.session_state.article_text, height=400)

    docx_buffer = save_docx_formatted(st.session_state.article_text, st.session_state.article_title)

    st.download_button(
        "Download DOCX",
        data=docx_buffer,
        file_name=f"seo_article_{safe_filename(st.session_state.article_title)}.docx",
        mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    )