From abcd3337ddd89849d4142c9141dd10918f7aeea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Floure?= Date: Sun, 22 May 2022 14:30:44 +0200 Subject: [PATCH] Add minimal handler clustering logic --- lib/ha_handler.ex | 1 + lib/ha_handler/application.ex | 1 + lib/ha_handler/cluster.ex | 108 ++++++++++++++++++++ lib/ha_handler/control.ex | 2 +- lib/ha_handler/web/controller.ex | 4 +- lib/ha_handler/web/templates/index.html.eex | 27 ++++- mix.exs | 2 +- 7 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 lib/ha_handler/cluster.ex diff --git a/lib/ha_handler.ex b/lib/ha_handler.ex index 345c045..cadca5c 100644 --- a/lib/ha_handler.ex +++ b/lib/ha_handler.ex @@ -13,6 +13,7 @@ defmodule HAHandler do def haproxy_socket, do: Application.get_env(@otp_app, :haproxy_socket) def pgsql_instances, do: Application.get_env(@otp_app, :pgsql_instances, []) def drbd_instances, do: Application.get_env(@otp_app, :drbd_instances, []) + def handler_instances, do: Application.get_env(@otp_app, :handler_instances, []) def acme_challenge_path, do: Application.get_env(@otp_app, :acme_challenge_path) def static_path(), do: Application.app_dir(@otp_app, "priv/static/") diff --git a/lib/ha_handler/application.ex b/lib/ha_handler/application.ex index baea618..f0523f8 100644 --- a/lib/ha_handler/application.ex +++ b/lib/ha_handler/application.ex @@ -14,6 +14,7 @@ defmodule HAHandler.Application do scheme: :http, plug: HAHandler.Web.Router, options: [port: HAHandler.http_port()]}, {HAHandler.PGSQL.Supervisor, HAHandler.pgsql_instances()}, {HAHandler.DRBD.Supervisor, HAHandler.drbd_instances()}, + {HAHandler.Cluster, HAHandler.handler_instances()}, {HAHandler.Control, []} ] diff --git a/lib/ha_handler/cluster.ex b/lib/ha_handler/cluster.ex new file mode 100644 index 0000000..615212a --- /dev/null +++ b/lib/ha_handler/cluster.ex @@ -0,0 +1,108 @@ +defmodule HAHandler.Cluster do + use GenServer + require Logger + + # How much do we wait (ms) between each check/decision-making round? + @refresh 30_000 + + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @impl true + def init(instances) do + if Node.alive?() do + Logger.info("Distribution/clustering is ENABLED.") + Logger.info("Current handler instance is: #{Node.self()}") + Logger.info("Configured handler instances: #{inspect(instances)}") + :net_kernel.monitor_nodes(true) + + send(self(), :sync) + else + Logger.warning("Distribution is DISABLED - skipping clustering logic") + end + + {:ok, instances} + end + + @impl true + def handle_info(:sync, instances) do + current_network = Node.list() ++ [Node.self()] + + for node_name <- instances do + # Nothing to do if the node is already in our network/cluster. + if node_name not in current_network do + case Node.connect(node_name) do + true -> + Logger.info("Connected to handler instance #{node_name}") + _ -> + Logger.warning("Could not connect to handler instance #{node_name}") + end + end + end + + Process.send_after(self(), :sync, @refresh) + + {:noreply, instances} + end + + @impl true + def handle_info({:nodedown, node}, instances) do + Logger.warning("Node #{node} went down.") + + {:noreply, instances} + end + + @impl true + def handle_info({:nodeup, node}, instances) do + Logger.info("Node #{node} came up.") + + send(self(), :sync) + + {:noreply, instances} + end + + @impl true + def handle_call(:get_details, _from, instances) do + {uptime_ms, _} = :erlang.statistics(:wall_clock) + + local_details = %{ + node: Node.self(), + otp_app: HAHandler.otp_app, + version: HAHandler.version, + uptime: round(uptime_ms / 1_000 / 60), + env: HAHandler.env + } + + {:reply, local_details, instances} + end + + @impl true + def handle_call(:get_instances, _from, instances) do + {:reply, instances, instances} + end + + def get_instance_details() do + known_instances = [Node.self()] ++ Node.list() ++ GenServer.call(__MODULE__, :get_instances) + + known_instances + |> Enum.uniq() + |> Enum.map(fn node -> + try do + # FIXME: remote node coud return garbage/another structure! + GenServer.call({__MODULE__, node}, :get_details) + |> Map.put(:status, :up) + catch + :exit, _err -> + %{ + node: node, + otp_app: :unknown, + version: :unknown, + uptime: :unknown, + env: :unknown, + status: :down + } + end + end) + end +end diff --git a/lib/ha_handler/control.ex b/lib/ha_handler/control.ex index 2d96f24..1724526 100644 --- a/lib/ha_handler/control.ex +++ b/lib/ha_handler/control.ex @@ -178,7 +178,7 @@ defmodule HAHandler.Control do process_pgsql() process_drbd() {:error, err} -> - Logger.error("Unable to fetch HAProxy state (#{inspect(err)}( - skipping control loop.") + Logger.error("Unable to fetch HAProxy state (#{inspect(err)}) - skipping control loop.") end # Schedule next round. diff --git a/lib/ha_handler/web/controller.ex b/lib/ha_handler/web/controller.ex index babf10a..68b3498 100644 --- a/lib/ha_handler/web/controller.ex +++ b/lib/ha_handler/web/controller.ex @@ -1,7 +1,7 @@ defmodule HAHandler.Web.Controller do import Plug.Conn - alias HAHandler.{HAProxy, PGSQL, DRBD} + alias HAHandler.{HAProxy, PGSQL, DRBD, Cluster} @template_dir "lib/ha_handler/web/templates" @index_template EEx.compile_file(Path.join(@template_dir, "index.html.eex")) @@ -20,11 +20,13 @@ defmodule HAHandler.Web.Controller do haproxy_stats = HAProxy.get_stats(hide_error: true) pgsql_stats = PGSQL.get_stats() drbd_stats = DRBD.get_stats() + handler_stats = Cluster.get_instance_details() assigns = [ haproxy_stats: haproxy_stats, pgsql_status: pgsql_stats, drbd_status: drbd_stats, + handler_status: handler_stats, hostname: hostname, otp_app: HAHandler.otp_app(), version: HAHandler.version(), diff --git a/lib/ha_handler/web/templates/index.html.eex b/lib/ha_handler/web/templates/index.html.eex index 047dce4..6225f06 100644 --- a/lib/ha_handler/web/templates/index.html.eex +++ b/lib/ha_handler/web/templates/index.html.eex @@ -25,7 +25,32 @@

Handler

- <%= otp_app %> v<%= version %> (<%= env %>) running on <%= hostname %> +

+ Local instance: <%= otp_app %> v<%= version %> (<%= env %>) running on <%= hostname %> +

+ + + + + + + + + + + + + <%= for instance <- handler_status do %> + + + + + + + + <% end %> + +
InstanceVersionEnvStatusUptime
<%= instance.node %><%= instance.version %><%= instance.env %><%= instance.status %><%= instance.uptime %>m

diff --git a/mix.exs b/mix.exs index 4a09566..aaf8a3c 100644 --- a/mix.exs +++ b/mix.exs @@ -4,7 +4,7 @@ defmodule HAHandler.MixProject do def project do [ app: :ha_handler, - version: "0.3.0", + version: "0.4.0", elixir: "~> 1.12", start_permanent: Mix.env() == :prod, deps: deps(),