Skip to content

Presence stops working after ~1 week #179

Open
@flupke

Description

Environment

  • Elixir version (elixir -v): 1.14.0
  • Phoenix version (mix deps): 1.7.7
  • Operating system: ubuntu:jammy-20230126

Actual behavior

After a long running period, Presence stops working:

** (SystemLimitError) a system limit has been reached due to errors at the given arguments:
  * 2nd argument: not a valid match specification
    (stdlib 4.3.1) :ets.select(MultiplayerBackendWeb.Presence_shard0, [{{{"project:ubnGwd32md3iDZSwO4mTRz1T", :_, :"$1"}, :"$2", {:"$3", :_}}, [not: {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692810649862196}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692593405495489}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693026009636719}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693380369590663}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693463743616698}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692822408764548}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693580591386986}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692512764076126}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692867520504760}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692872751130110}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693060187174415}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693408737896420}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692744903811224}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", ...}}}, {:or, {:"=:=", :"$3", ...}, {:or, ...}}}}}}}}}}}}}}}}], [{{:"$1", :"$2"}}]}])
    (phoenix_pubsub 2.1.3) lib/phoenix/tracker/state.ex:167: Phoenix.Tracker.State.tracked_values/3
    (phoenix 1.7.7) lib/phoenix/presence.ex:532: Phoenix.Presence.list/2
    (multiplayer_backend 0.1.0) lib/multiplayer_backend_web/channels/project_channel.ex:56: MultiplayerBackendWeb.ProjectChannel.handle_info/2
    (phoenix 1.7.7) lib/phoenix/channel/server.ex:354: Phoenix.Channel.Server.handle_info/2
    (stdlib 4.3.1) gen_server.erl:1123: :gen_server.try_dispatch/4
    (stdlib 4.3.1) gen_server.erl:1200: :gen_server.handle_msg/6
    (stdlib 4.3.1) proc_lib.erl:240: :proc_lib.init_p_do_apply/3
Last message: :after_join

It fails here:

def handle_info(:after_join, socket) do
  presence_list = Presence.list(socket)  # <------------------------

  {:ok, project_coordinator_pid} =
    ProjectCoordinator.ensure_started(socket.assigns.project_id,
      presence: %{
        topic: socket.topic,
        list: presence_list,
        pubsub_server: socket.pubsub_server
      }
    )

  socket = assign(socket, :project_coordinator_pid, project_coordinator_pid)

  # Monitor the coordinator so we can kill the channel if it dies
  ref = Process.monitor(project_coordinator_pid)
  socket = assign(socket, :coordinator_monitor_ref, ref)

  {:ok, _} =
    Presence.track(socket, socket.assigns.peer_id, %{
      joined_at: inspect(System.system_time(:second)),
      online_at: inspect(System.system_time(:second)),
      user_data: remove_token_from_presence(socket.assigns.user_data)
    })

  push(socket, "presence_state", presence_list)
  get_project(socket)
  {:noreply, socket}
end

Restarting the instances of the cluster fixes the issue and it comes back after a week or two.

Expected behavior

It should not crash.

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions