added github action to build playlist ever six hours

machineinteractive · machineinteractive · commit 70c086451f16 · 2025-07-28T22:34:08.000-04:00
misc refactoring to build-mfp-index.html.py script

added pip requirements.txt file
diff --git a/.github/workflows/update-mfp-index-html.yml b/.github/workflows/update-mfp-index-html.yml
@@ -0,0 +1,36 @@
+name: "Build index.html"
+
+on:
+  schedule:
+    - cron: "* */6 * * *"
+  push:
+    branches:
+      - "main"
+
+jobs:
+  update-index-html:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+          cache: "pip"
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+      - name: Run build mfp index.html script
+        run: python build-mfp-index-html.py
+      - name: Commit and push
+        run: |
+          git diff --quiet index.html
+          if [ "$?" -eq 1 ];
+          then
+            echo "Changes detected in playlist. Publishing..."
+            git config user.name "github-actions[bot]"
+            git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+            git add index.html
+            git commit -m "Update mfp; playlist with latest content."
+            git push
+          else
+            echo "No changes detected in playlist. Skipping publish..."
+          fi
diff --git a/README.md b/README.md
@@ -3,7 +3,3 @@
 A PWA version of [musicforprogramming.net](https://musicforprogramming.net).
 
 https://mfp-app.pages.dev/
-
-pip install feeder
-pip install requests
-pip install Jinja2
diff --git a/build-mfp-index-html.py b/build-mfp-index-html.py
@@ -17,114 +17,120 @@ class Episode:
     tracks: str
     links: str
 
-episodes = []
-
-
-#
-# always fetch RSS feed to get the latest episodes
-# this is the safest way to ensure we have the most up-to-date information
-#
-
-feed = feedparser.parse("https://musicforprogramming.net/rss.xml")
-
-print(len(feed.entries))
-
-for entry in feed.entries:
-    episode = Episode(
-        title=entry.title,
-        link=entry.link,
-        pubDate=entry.published,
-        guid=entry.guid,
-        tracks="",
-        links=""
-    )
-    episodes.append(episode)
-    # print(entry.title)
-    # print(entry.link)
-    # print(entry.published)
-    # print(entry.summary)
-    # print(entry.guid)
-    # print()
-    #print(f"""<a href="{entry.guid}">{entry.title.replace("Episode ", "")}</a>""")
-
-#print(episodes)
-
-#
-# fetch the tracklist for each episode from the latest client script
-# this is the only way I know of to get the tracklist but it is not guaranteed to work
-# in the future if the site changes how it works
-#
-
-r = requests.get("https://musicforprogramming.net/latest/")
-client_script_start = r.text.find("/client/client")
-client_script_end = r.text[client_script_start:].find('";s.type')
-#print("client_script_start/end: ", client_script_start, client_script_end)
-cur_client_script = r.text[client_script_start:client_script_start + client_script_end]
-#print("Current client script:", cur_client_script)
-
-r = requests.get("https://musicforprogramming.net" + cur_client_script)
-data = r.text
-#print(data)
-json_start = data.find("const Xt")
-json_end = data.find("function Qt(t)")
-#print(json_start, json_end)
-json_data = data[json_start:json_end].replace("const Xt=[", "").replace("];", "")
-#print(json_data)
-
-json_data_regex = r"\{(.*?)\}"
-
-links_regex = r',"links:.*$'
-links_http_regex = r'">(.*?)</a>'
-
-json_objects = [o for o in re.findall(json_data_regex, json_data) if 'type:"episode"' in o]
-
-n = len(json_objects)
-
-for json_obj in json_objects:
-    scrubbed = json_obj.replace("\",", "\",\"") \
-      .replace(":\"", "\":\"") \
-      .replace("{slug", "{\"slug") \
-      .replace(",\"order:", ",\"order\":") \
-      .replace(",title\"", ",\"title\"") \
-      .replace("special:!0", "special\":\"!0") \
-      .replace("!0,file", "!0\",\"file")
-    print(scrubbed)    
-
-    links_removed = re.sub(links_regex, " }", scrubbed)
-    print(links_removed)
-
-    tracklist_start = links_removed.find('"tracklist":"')
-    tracklist_end = links_removed.find('" }')
-    print(tracklist_start, tracklist_end)
-
-    tracks = links_removed[tracklist_start:tracklist_end] \
-      .replace('"tracklist":"', '') \
-      .replace('\\n', '') \
-      .replace('\\t', '') \
-      .strip() \
-      .split('<br>')[:-1]
-    
-    print("Tracks:", tracks)
-
-    link_matches = re.findall(links_http_regex, scrubbed[scrubbed.find('"links:'):])
-    for link in link_matches:        
-        print("Link:", link)
-
-    episode = episodes[n - 1]
-    episode.tracks = json.dumps(tracks)
-    episode.links = json.dumps(link_matches)
-
-    n -= 1
-
-    print("\n\n")
-
-
-print("Episodes:", episodes)
-
-environment = Environment(loader=FileSystemLoader("templates/"), autoescape=select_autoescape())
-template = environment.get_template("mfp-template-index.html")
-
-output = template.render(episodes=episodes)
-
-with open("index.html", "w") as f:
-    f.write(output)
+def main():
+    episodes = []
+
+
+    #
+    # always fetch RSS feed to get the latest episodes
+    # this is the safest way to ensure we have the most up-to-date information
+    #
+
+    feed = feedparser.parse("https://musicforprogramming.net/rss.xml")
+
+    print(len(feed.entries))
+
+    for entry in feed.entries:
+        episode = Episode(
+            title=entry.title,
+            link=entry.link,
+            pubDate=entry.published,
+            guid=entry.guid,
+            tracks="",
+            links=""
+        )
+        episodes.append(episode)
+        # print(entry.title)
+        # print(entry.link)
+        # print(entry.published)
+        # print(entry.summary)
+        # print(entry.guid)
+        # print()
+        #print(f"""<a href="{entry.guid}">{entry.title.replace("Episode ", "")}</a>""")
+
+    #print(episodes)
+
+    #
+    # fetch the tracklist for each episode from the latest client script
+    # this is the only way I know of to get the tracklist but it is not guaranteed to work
+    # in the future if the site changes how it works
+    #
+
+    r = requests.get("https://musicforprogramming.net/latest/")
+    client_script_start = r.text.find("/client/client")
+    client_script_end = r.text[client_script_start:].find('";s.type')
+    #print("client_script_start/end: ", client_script_start, client_script_end)
+    cur_client_script = r.text[client_script_start:client_script_start + client_script_end]
+    #print("Current client script:", cur_client_script)
+
+    r = requests.get("https://musicforprogramming.net" + cur_client_script)
+    data = r.text
+    #print(data)
+    json_start = data.find("const Xt")
+    json_end = data.find("function Qt(t)")
+    #print(json_start, json_end)
+    json_data = data[json_start:json_end].replace("const Xt=[", "").replace("];", "")
+    #print(json_data)
+
+    json_data_regex = r"\{(.*?)\}"
+
+    links_regex = r',"links:.*$'
+    links_http_regex = r'">(.*?)</a>'
+
+    json_objects = [o for o in re.findall(json_data_regex, json_data) if 'type:"episode"' in o]
+
+    n = len(json_objects)
+
+    for json_obj in json_objects:
+        scrubbed = json_obj.replace("\",", "\",\"") \
+        .replace(":\"", "\":\"") \
+        .replace("{slug", "{\"slug") \
+        .replace(",\"order:", ",\"order\":") \
+        .replace(",title\"", ",\"title\"") \
+        .replace("special:!0", "special\":\"!0") \
+        .replace("!0,file", "!0\",\"file")
+        print(scrubbed)    
+
+        links_removed = re.sub(links_regex, " }", scrubbed)
+        print(links_removed)
+
+        tracklist_start = links_removed.find('"tracklist":"')
+        tracklist_end = links_removed.find('" }')
+        print(tracklist_start, tracklist_end)
+
+        tracks = links_removed[tracklist_start:tracklist_end] \
+        .replace('"tracklist":"', '') \
+        .replace('\\n', '') \
+        .replace('\\t', '') \
+        .strip() \
+        .split('<br>')[:-1]
+        
+        print("Tracks:", tracks)
+
+        link_matches = re.findall(links_http_regex, scrubbed[scrubbed.find('"links:'):])
+        for link in link_matches:        
+            print("Link:", link)
+
+        episode = episodes[n - 1]
+        episode.tracks = json.dumps(tracks)
+        episode.links = json.dumps(link_matches)
+
+        n -= 1
+
+        print("\n\n")
+
+
+    print("Episodes:", episodes)
+
+    environment = Environment(loader=FileSystemLoader("templates/"), autoescape=select_autoescape())
+    template = environment.get_template("mfp-template-index.html")
+
+    output = template.render(episodes=episodes)
+
+    with open("index.html", "w") as f:
+        f.write(output)
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+# requirements.txt
+feedparser
+Jinja2
+requests