defmodule HAHandler.Cluster do use GenServer require Logger # How much do we wait (ms) between each check/decision-making round? @refresh 30_000 def start_link(opts) do GenServer.start_link(__MODULE__, opts, name: __MODULE__) end @impl true def init(instances) do if Node.alive?() do Logger.info("Distribution/clustering is ENABLED.") Logger.info("Current handler instance is: #{Node.self()}") Logger.info("Configured handler instances: #{inspect(instances)}") :net_kernel.monitor_nodes(true) send(self(), :sync) else Logger.warning("Distribution is DISABLED - skipping clustering logic") end {:ok, instances} end @impl true def handle_info(:sync, instances) do current_network = Node.list() ++ [Node.self()] for node_name <- instances do # Nothing to do if the node is already in our network/cluster. if node_name not in current_network do case Node.connect(node_name) do true -> Logger.info("Connected to handler instance #{node_name}") _ -> Logger.warning("Could not connect to handler instance #{node_name}") end end end Process.send_after(self(), :sync, @refresh) {:noreply, instances} end @impl true def handle_info({:nodedown, node}, instances) do Logger.warning("Node #{node} went down.") {:noreply, instances} end @impl true def handle_info({:nodeup, node}, instances) do Logger.info("Node #{node} came up.") send(self(), :sync) {:noreply, instances} end @impl true def handle_call(:get_details, _from, instances) do {uptime_ms, _} = :erlang.statistics(:wall_clock) local_details = %{ node: Node.self(), otp_app: HAHandler.otp_app, version: HAHandler.version, uptime: round(uptime_ms / 1_000 / 60), env: HAHandler.env } {:reply, local_details, instances} end @impl true def handle_call(:get_instances, _from, instances) do {:reply, instances, instances} end def get_instance_details() do known_instances = [Node.self()] ++ Node.list() ++ GenServer.call(__MODULE__, :get_instances) known_instances |> Enum.uniq() |> Enum.map(fn node -> try do # FIXME: remote node coud return garbage/another structure! GenServer.call({__MODULE__, node}, :get_details) |> Map.put(:status, :up) catch :exit, _err -> %{ node: node, otp_app: :unknown, version: :unknown, uptime: :unknown, env: :unknown, status: :down } end end) end end