Add minimal handler clustering logic
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Timothée Floure 2022-05-22 14:30:44 +02:00
parent 77ebea3746
commit abcd3337dd
Signed by: tfloure
GPG Key ID: 4502C902C00A1E12
7 changed files with 141 additions and 4 deletions

View File

@ -13,6 +13,7 @@ defmodule HAHandler do
def haproxy_socket, do: Application.get_env(@otp_app, :haproxy_socket)
def pgsql_instances, do: Application.get_env(@otp_app, :pgsql_instances, [])
def drbd_instances, do: Application.get_env(@otp_app, :drbd_instances, [])
def handler_instances, do: Application.get_env(@otp_app, :handler_instances, [])
def acme_challenge_path, do: Application.get_env(@otp_app, :acme_challenge_path)
def static_path(), do: Application.app_dir(@otp_app, "priv/static/")

View File

@ -14,6 +14,7 @@ defmodule HAHandler.Application do
scheme: :http, plug: HAHandler.Web.Router, options: [port: HAHandler.http_port()]},
{HAHandler.PGSQL.Supervisor, HAHandler.pgsql_instances()},
{HAHandler.DRBD.Supervisor, HAHandler.drbd_instances()},
{HAHandler.Cluster, HAHandler.handler_instances()},
{HAHandler.Control, []}
]

108
lib/ha_handler/cluster.ex Normal file
View File

@ -0,0 +1,108 @@
defmodule HAHandler.Cluster do
use GenServer
require Logger
# How much do we wait (ms) between each check/decision-making round?
@refresh 30_000
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@impl true
def init(instances) do
if Node.alive?() do
Logger.info("Distribution/clustering is ENABLED.")
Logger.info("Current handler instance is: #{Node.self()}")
Logger.info("Configured handler instances: #{inspect(instances)}")
:net_kernel.monitor_nodes(true)
send(self(), :sync)
else
Logger.warning("Distribution is DISABLED - skipping clustering logic")
end
{:ok, instances}
end
@impl true
def handle_info(:sync, instances) do
current_network = Node.list() ++ [Node.self()]
for node_name <- instances do
# Nothing to do if the node is already in our network/cluster.
if node_name not in current_network do
case Node.connect(node_name) do
true ->
Logger.info("Connected to handler instance #{node_name}")
_ ->
Logger.warning("Could not connect to handler instance #{node_name}")
end
end
end
Process.send_after(self(), :sync, @refresh)
{:noreply, instances}
end
@impl true
def handle_info({:nodedown, node}, instances) do
Logger.warning("Node #{node} went down.")
{:noreply, instances}
end
@impl true
def handle_info({:nodeup, node}, instances) do
Logger.info("Node #{node} came up.")
send(self(), :sync)
{:noreply, instances}
end
@impl true
def handle_call(:get_details, _from, instances) do
{uptime_ms, _} = :erlang.statistics(:wall_clock)
local_details = %{
node: Node.self(),
otp_app: HAHandler.otp_app,
version: HAHandler.version,
uptime: round(uptime_ms / 1_000 / 60),
env: HAHandler.env
}
{:reply, local_details, instances}
end
@impl true
def handle_call(:get_instances, _from, instances) do
{:reply, instances, instances}
end
def get_instance_details() do
known_instances = [Node.self()] ++ Node.list() ++ GenServer.call(__MODULE__, :get_instances)
known_instances
|> Enum.uniq()
|> Enum.map(fn node ->
try do
# FIXME: remote node coud return garbage/another structure!
GenServer.call({__MODULE__, node}, :get_details)
|> Map.put(:status, :up)
catch
:exit, _err ->
%{
node: node,
otp_app: :unknown,
version: :unknown,
uptime: :unknown,
env: :unknown,
status: :down
}
end
end)
end
end

View File

@ -178,7 +178,7 @@ defmodule HAHandler.Control do
process_pgsql()
process_drbd()
{:error, err} ->
Logger.error("Unable to fetch HAProxy state (#{inspect(err)}( - skipping control loop.")
Logger.error("Unable to fetch HAProxy state (#{inspect(err)}) - skipping control loop.")
end
# Schedule next round.

View File

@ -1,7 +1,7 @@
defmodule HAHandler.Web.Controller do
import Plug.Conn
alias HAHandler.{HAProxy, PGSQL, DRBD}
alias HAHandler.{HAProxy, PGSQL, DRBD, Cluster}
@template_dir "lib/ha_handler/web/templates"
@index_template EEx.compile_file(Path.join(@template_dir, "index.html.eex"))
@ -20,11 +20,13 @@ defmodule HAHandler.Web.Controller do
haproxy_stats = HAProxy.get_stats(hide_error: true)
pgsql_stats = PGSQL.get_stats()
drbd_stats = DRBD.get_stats()
handler_stats = Cluster.get_instance_details()
assigns = [
haproxy_stats: haproxy_stats,
pgsql_status: pgsql_stats,
drbd_status: drbd_stats,
handler_status: handler_stats,
hostname: hostname,
otp_app: HAHandler.otp_app(),
version: HAHandler.version(),

View File

@ -25,7 +25,32 @@
<h2>Handler</h2>
<%= otp_app %> <b>v<%= version %></b> (<%= env %>) running on <b><%= hostname %></b>
<p>
<b>Local instance:</b> <%= otp_app %> <b>v<%= version %></b> (<%= env %>) running on <b><%= hostname %></b>
</p>
<table>
<thead>
<tr>
<th>Instance</th>
<th>Version</th>
<th>Env</th>
<th>Status</th>
<th>Uptime</th>
</tr>
</thead>
<tbody>
<%= for instance <- handler_status do %>
<tr>
<td><%= instance.node %></td>
<td><%= instance.version %></td>
<td><%= instance.env %></td>
<td><%= instance.status %></td>
<td><%= instance.uptime %>m</td>
</tr>
<% end %>
</tbody>
</table>
<hr />

View File

@ -4,7 +4,7 @@ defmodule HAHandler.MixProject do
def project do
[
app: :ha_handler,
version: "0.3.0",
version: "0.4.0",
elixir: "~> 1.12",
start_permanent: Mix.env() == :prod,
deps: deps(),