108 lines
2.6 KiB
Elixir
108 lines
2.6 KiB
Elixir
defmodule HAHandler.Cluster do
|
|
use GenServer
|
|
require Logger
|
|
|
|
# How much do we wait (ms) between each check/decision-making round?
|
|
@refresh 30_000
|
|
|
|
def start_link(opts) do
|
|
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
|
|
end
|
|
|
|
@impl true
|
|
def init(instances) do
|
|
if Node.alive?() do
|
|
Logger.info("Distribution/clustering is ENABLED.")
|
|
Logger.info("Current handler instance is: #{Node.self()}")
|
|
Logger.info("Configured handler instances: #{inspect(instances)}")
|
|
:net_kernel.monitor_nodes(true)
|
|
|
|
send(self(), :sync)
|
|
else
|
|
Logger.warning("Distribution is DISABLED - skipping clustering logic")
|
|
end
|
|
|
|
{:ok, instances}
|
|
end
|
|
|
|
@impl true
|
|
def handle_info(:sync, instances) do
|
|
current_network = Node.list() ++ [Node.self()]
|
|
|
|
for node_name <- instances do
|
|
# Nothing to do if the node is already in our network/cluster.
|
|
if node_name not in current_network do
|
|
case Node.connect(node_name) do
|
|
true ->
|
|
Logger.info("Connected to handler instance #{node_name}")
|
|
_ ->
|
|
Logger.warning("Could not connect to handler instance #{node_name}")
|
|
end
|
|
end
|
|
end
|
|
|
|
Process.send_after(self(), :sync, @refresh)
|
|
|
|
{:noreply, instances}
|
|
end
|
|
|
|
@impl true
|
|
def handle_info({:nodedown, node}, instances) do
|
|
Logger.warning("Node #{node} went down.")
|
|
|
|
{:noreply, instances}
|
|
end
|
|
|
|
@impl true
|
|
def handle_info({:nodeup, node}, instances) do
|
|
Logger.info("Node #{node} came up.")
|
|
|
|
send(self(), :sync)
|
|
|
|
{:noreply, instances}
|
|
end
|
|
|
|
@impl true
|
|
def handle_call(:get_details, _from, instances) do
|
|
{uptime_ms, _} = :erlang.statistics(:wall_clock)
|
|
|
|
local_details = %{
|
|
node: Node.self(),
|
|
otp_app: HAHandler.otp_app,
|
|
version: HAHandler.version,
|
|
uptime: round(uptime_ms / 1_000 / 60),
|
|
env: HAHandler.env
|
|
}
|
|
|
|
{:reply, local_details, instances}
|
|
end
|
|
|
|
@impl true
|
|
def handle_call(:get_instances, _from, instances) do
|
|
{:reply, instances, instances}
|
|
end
|
|
|
|
def get_instance_details() do
|
|
known_instances = [Node.self()] ++ Node.list() ++ GenServer.call(__MODULE__, :get_instances)
|
|
|
|
known_instances
|
|
|> Enum.uniq()
|
|
|> Enum.map(fn node ->
|
|
try do
|
|
# FIXME: remote node coud return garbage/another structure!
|
|
GenServer.call({__MODULE__, node}, :get_details)
|
|
|> Map.put(:status, :up)
|
|
catch
|
|
:exit, _err ->
|
|
%{
|
|
node: node,
|
|
otp_app: :unknown,
|
|
version: :unknown,
|
|
uptime: :unknown,
|
|
env: :unknown,
|
|
status: :down
|
|
}
|
|
end
|
|
end)
|
|
end
|
|
end
|