Compare commits

...

8 Commits

Author SHA1 Message Date
Timothée Floure 31fe677c20
Release ha-handler v0.4.2
continuous-integration/drone/push Build encountered an error Details
continuous-integration/drone/tag Build encountered an error Details
continuous-integration/drone Build is passing Details
2022-07-04 12:38:09 +02:00
Timothée Floure b4eb4f524d
Appsignal: ignore backend errors.
continuous-integration/drone/push Build encountered an error Details
2022-07-04 12:36:34 +02:00
Timothée Floure 992ff7f5ef
Fix eventual crash on failed DRBD backend, bump development version
continuous-integration/drone/push Build encountered an error Details
2022-07-04 12:27:44 +02:00
Timothée Floure fb3338b4d9
Fix synthax error in PGSQL watcher logic
continuous-integration/drone/push Build is passing Details
2022-06-13 21:08:20 +02:00
Timothée Floure 884796d50c
Release 0.4.1
continuous-integration/drone/tag Build is failing Details
continuous-integration/drone/push Build is failing Details
2022-06-13 18:51:59 +02:00
Timothée Floure 4a2b6a4948
Do not crash due to failed backend components 2022-06-13 18:49:55 +02:00
Timothée Floure aeb6db4f77
Remove secrets of test environment committed by error
continuous-integration/drone/push Build is passing Details
They were changed anyway.
2022-06-09 10:15:07 +02:00
Timothée Floure 06b52b3b2a
Pin build image to Alpine 3.15
continuous-integration/drone/push Build is passing Details
2022-06-09 08:54:13 +02:00
8 changed files with 99 additions and 31 deletions

View File

@ -4,7 +4,7 @@ name: default
steps:
- name: build-release
image: alpine:latest
image: alpine:3.15
environment:
MIX_ENV: prod
commands:
@ -17,7 +17,7 @@ steps:
- cd _build/prod/rel
- tar czf "ha-handler-$(git describe --exact-match --tags $(git log -n1 --pretty='%h') || git rev-parse HEAD).tar.gz" ha_handler/
- name: publish-release-archive
image: alpine:latest
image: alpine:3.15
environment:
LFTP_PASSWORD:
from_secret: ssh_password

View File

@ -1,4 +1,14 @@
# 2022-04-20 - v0.4.0
# 2022-07-04 - v0.4.2
* Fix eventual crash on failed DRBD backend.
* Appsignal: ignore errors on backends (failed PSQL backend currently generate
errors, and floods appsignal).
# 2022-06-13 - v0.4.1
* Fix crash on failed SSHEx / Postgrex connection failure.
# 2022-06-09 - v0.4.0
* Add minimal clustering logic.
* Fix crash on unavailable HAProxy socket.

View File

@ -1,16 +1,20 @@
import Config
config :ha_handler,
http_port: 4000,
http_port: 4040,
acme_challenge_path: "acme-challenge",
haproxy_socket: System.get_env("HAPROXY_SOCKET") || "/var/run/haproxy.sock",
handler_instances: [
:"ha_handler@fenschel",
:"ha_handler2@fenschel"
],
pgsql_instances: [
[
hostname: "pgsql.lnth.ch.recycled.cloud",
username: "postgres",
database: "postgres",
haproxy_server: "lnth",
password: "eicheeR6ieph5jae7oozahf3vesio9Ae",
password: "secret",
socket_options: [:inet6],
ssl: true
],
@ -19,7 +23,7 @@ config :ha_handler,
haproxy_server: "fvil",
username: "postgres",
database: "postgres",
password: "eicheeR6ieph5jae7oozahf3vesio9Ae",
password: "secret",
socket_options: [:inet6],
ssl: true
]
@ -27,12 +31,12 @@ config :ha_handler,
drbd_instances: [
[
hostname: "drbd.lnth.ch.recycled.cloud",
password: "tu9laiz9teece6aithohjohph6eCh3qu",
password: "secret",
haproxy_server: "lnth"
],
[
hostname: "drbd.fvil.ch.recycled.cloud",
password: "tu9laiz9teece6aithohjohph6eCh3qu",
password: "secret",
haproxy_server: "fvil"
]
]
@ -42,4 +46,5 @@ config :appsignal, :config,
otp_app: :ha_handler,
name: "ha-handler",
push_api_key: "secret",
ignore_namespaces: ["pgsql", "drbd"],
env: config_env()

View File

@ -1,4 +1,6 @@
defmodule HAHandler.DRBD do
require Logger
@supervisor HAHandler.DRBD.Supervisor
# There might be >1 resources configured in DRBD!
@ -18,6 +20,15 @@ defmodule HAHandler.DRBD do
@id_extraction_regex ~r/\n\s(?<id>\d+)\:\s/
@data_extraction_regex ~r/cs:(?<cs>(\w|\/)+)\sro:(?<ro>(\w|\/)+)\sds:(?<ds>(\w|\/)+)\s/
# Empty state, when backend is not queryable for some reason.
@empty_state %{
hostname: "unknown",
version: "",
mode: "",
status: "unknown",
data: ""
}
def get_instances() do
watchers = Supervisor.which_children(@supervisor)
@ -32,6 +43,8 @@ defmodule HAHandler.DRBD do
end
def get_state({hostname, pid}) do
empty_reply = %{@empty_state | hostname: hostname}
case GenServer.call(pid, {:execute, @drbd_proc_cmd}) do
{:ok, raw, 0} ->
case Regex.named_captures(@block_regex, raw) do
@ -54,21 +67,25 @@ defmodule HAHandler.DRBD do
|> Enum.filter(fn r -> r["id"] == @default_resource_id end)
|> Enum.at(0)
%{
hostname: hostname,
processed_reply = %{
version: Map.get(version, "full"),
mode: Map.get(default_resource, "ro"),
status: Map.get(default_resource, "ds"),
data: resources
}
Map.merge(empty_reply, processed_reply)
end
_ ->
{:error, "could not parse /proc/drbd"}
Logger.warning("Failed to query DRBD backend: could not parse /proc/drbd.")
end
{:ok, _, posix_err} ->
{:error, posix_err}
{:error, _err} = reply ->
reply
Logger.warning("Failed to query DRBD backend: POSIX #{inspect(posix_err)}.")
empty_reply
{:error, err} ->
Logger.warning("Failed to query DRBD backend: #{inspect(err)}.")
empty_reply
end
end
end

View File

@ -17,7 +17,9 @@ defmodule HAHandler.DRBD.Supervisor do
}
end)
opts = [strategy: :one_for_one]
opts = [
strategy: :one_for_one
]
Supervisor.init(children, opts)
end
end

View File

@ -25,27 +25,52 @@ defmodule HAHandler.DRBD.Watcher do
@impl true
def init(opts) do
hostname = Keyword.get(opts, :hostname)
password = Keyword.get(opts, :password)
{:ok, pid} = connect(hostname, password)
# Configures this worker's jobs to report in the "drbd" namespace
Appsignal.Span.set_namespace(Appsignal.Tracer.root_span(), "drbd")
state = %{
backend: pid,
hostname: hostname,
password: password
backend: nil,
last_reconnect: nil,
hostname: Keyword.get(opts, :hostname),
password: Keyword.get(opts, :password),
}
# This action will be processed once the GenServer is fully
# started/operational. This process handle connection failures by itself,
# as we don't want to crash loop into supervisor logic (which is only there
# to handle unexpected failure).
send self(), :reconnect
{:ok, state}
end
@impl true
def handle_info(:reconnect, state = %{hostname: hostname, password: password}) do
case connect(hostname, password) do
{:ok, pid} ->
{:noreply, %{state | backend: pid}}
{:error, _err} ->
# Nothing to do, as the next request will trigger the reconnect logic
# (see :execute call).
{:noreply, state}
end
end
@impl true
def handle_call({:execute, cmd}, _from, %{backend: backend} = state) do
case SSHEx.run(backend, cmd) do
{:ok, _output, _status} = reply->
{:reply, reply, state}
{:error, :closed} = reply ->
# Asynchroneously tries to reopen the connection to the backend.
send self(), :reconnect
{:reply, reply, state}
{:error, _err} = reply ->
{:error, reply, state}
# Do not take action on unknown error.
{:reply, reply, state}
end
end
end

View File

@ -8,18 +8,27 @@ defmodule HAHandler.PGSQL.Watcher do
@impl true
def init(opts) do
# Configures this worker's jobs to report in the "pgsql" namespace
Appsignal.Span.set_namespace(Appsignal.Tracer.root_span(), "pgsql")
# Starts a Postgrex child but does not means the connection was
# successful.
# TODO: set dbconnections backoff and connect hooks
# See https://github.com/elixir-ecto/db_connection/blob/master/lib/db_connection.ex#L343
{:ok, pid} = Postgrex.start_link(opts)
case Postgrex.start_link(opts) do
{:ok, pid} ->
state = %{
backend: pid,
hostname: Keyword.get(opts, :hostname)
}
state = %{
backend: pid,
hostname: Keyword.get(opts, :hostname)
}
{:ok, state}
{:ok, state}
{:error, err} ->
# Will be catched by the supervisor if anything happen. It should not
# be triggered even if a PGSQL node down, since Postgrex has its own
# surpervision tree.
{:error, err}
end
end
@impl true

View File

@ -4,7 +4,7 @@ defmodule HAHandler.MixProject do
def project do
[
app: :ha_handler,
version: "0.4.0",
version: "0.4.2",
elixir: "~> 1.12",
start_permanent: Mix.env() == :prod,
deps: deps(),