From b3cf40a01afc5d7e777fab8a1667595b00fb4b20 Mon Sep 17 00:00:00 2001 From: Mikayla Date: Sat, 8 Feb 2025 20:35:04 +0000 Subject: [PATCH 01/11] #589 initial attempt at reboot recovery --- coordinator/coordinator.lua | 12 ++ coordinator/process.lua | 5 + scada-common/comms.lua | 27 ++-- supervisor/facility.lua | 214 +++++++++++++++++++++-------- supervisor/facility_update.lua | 27 +++- supervisor/session/coordinator.lua | 42 +++++- supervisor/session/plc.lua | 1 - supervisor/startup.lua | 3 + supervisor/supervisor.lua | 13 +- supervisor/unit.lua | 6 + 10 files changed, 266 insertions(+), 84 deletions(-) diff --git a/coordinator/coordinator.lua b/coordinator/coordinator.lua index 36163eb..c0ba2f3 100644 --- a/coordinator/coordinator.lua +++ b/coordinator/coordinator.lua @@ -380,6 +380,18 @@ function coordinator.comms(version, nic, sv_watchdog) _send_sv(PROTOCOL.SCADA_MGMT, MGMT_TYPE.CLOSE, {}) end + -- send the resume ready state to the supervisor + ---@param mode PROCESS process control mode + ---@param burn_target number burn rate target + ---@param charge_target number charge level target + ---@param gen_target number generation rate target + ---@param limits number[] unit burn rate limits + function public.send_ready(mode, burn_target, charge_target, gen_target, limits) + _send_sv(PROTOCOL.SCADA_CRDN, CRDN_TYPE.PROCESS_READY, { + mode, burn_target, charge_target, gen_target, limits + }) + end + -- send a facility command ---@param cmd FAC_COMMAND command ---@param option any? optional option options for the optional options (like waste mode) diff --git a/coordinator/process.lua b/coordinator/process.lua index 1866686..0fc7c1a 100644 --- a/coordinator/process.lua +++ b/coordinator/process.lua @@ -139,6 +139,11 @@ function process.init(iocontrol, coord_comms) log.info("PROCESS: loaded priority groups settings") end + + -- report to the supervisor all initial configuration data has been sent + -- startup resume can occur if needed + local p = ctl_proc + pctl.comms.send_ready(p.mode, p.burn_target, p.charge_target, p.gen_target, p.limits) end -- create a handle to process control for usage of commands that get acknowledgements diff --git a/scada-common/comms.lua b/scada-common/comms.lua index a8d0014..48cf416 100644 --- a/scada-common/comms.lua +++ b/scada-common/comms.lua @@ -17,7 +17,7 @@ local max_distance = nil local comms = {} -- protocol/data versions (protocol/data independent changes tracked by util.lua version) -comms.version = "3.0.4" +comms.version = "3.0.5" comms.api_version = "0.0.9" ---@enum PROTOCOL @@ -60,18 +60,19 @@ local MGMT_TYPE = { ---@enum CRDN_TYPE local CRDN_TYPE = { INITIAL_BUILDS = 0, -- initial, complete builds packet to the coordinator - FAC_BUILDS = 1, -- facility RTU builds - FAC_STATUS = 2, -- state of facility and facility devices - FAC_CMD = 3, -- faility command - UNIT_BUILDS = 4, -- build of each reactor unit (reactor + RTUs) - UNIT_STATUSES = 5, -- state of each of the reactor units - UNIT_CMD = 6, -- command a reactor unit - API_GET_FAC = 7, -- API: get the facility general data - API_GET_FAC_DTL = 8, -- API: get (detailed) data for the facility app - API_GET_UNIT = 9, -- API: get reactor unit data - API_GET_CTRL = 10, -- API: get data for the control app - API_GET_PROC = 11, -- API: get data for the process app - API_GET_WASTE = 12 -- API: get data for the waste app + PROCESS_READY = 1, -- process init is complete + last set of info for supervisor startup recovery + FAC_BUILDS = 2, -- facility RTU builds + FAC_STATUS = 3, -- state of facility and facility devices + FAC_CMD = 4, -- faility command + UNIT_BUILDS = 5, -- build of each reactor unit (reactor + RTUs) + UNIT_STATUSES = 6, -- state of each of the reactor units + UNIT_CMD = 7, -- command a reactor unit + API_GET_FAC = 8, -- API: get the facility general data + API_GET_FAC_DTL = 9, -- API: get (detailed) data for the facility app + API_GET_UNIT = 10, -- API: get reactor unit data + API_GET_CTRL = 11, -- API: get data for the control app + API_GET_PROC = 12, -- API: get data for the process app + API_GET_WASTE = 13 -- API: get data for the waste app } ---@enum ESTABLISH_ACK diff --git a/supervisor/facility.lua b/supervisor/facility.lua index e47842f..6525d84 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -5,6 +5,7 @@ local util = require("scada-common.util") local unit = require("supervisor.unit") local fac_update = require("supervisor.facility_update") +local plc = require("supervisor.session.plc") local rsctl = require("supervisor.session.rsctl") local svsessions = require("supervisor.session.svsessions") @@ -31,6 +32,17 @@ local START_STATUS = { BLADE_MISMATCH = 2 } +---@enum RECOVERY_STATE +local RCV_STATE = { + INACTIVE = 0, + PRIMED = 1, + RUNNING = 2, + STOPPED = 3 +} + +local CHARGE_SCALER = 1000000 -- convert MFE to FE +local GEN_SCALER = 1000 -- convert kFE to FE + ---@class facility_management local facility = {} @@ -66,12 +78,15 @@ function facility.new(config) -- redstone I/O control io_ctl = nil, ---@type rs_controller -- process control + recovery = RCV_STATE.INACTIVE, ---@type RECOVERY_STATE + recovery_boot_state = nil, ---@type sv_control_state|nil + last_unit_states = nil, ---@type boolean[] units_ready = false, - mode = PROCESS.INACTIVE, - last_mode = PROCESS.INACTIVE, - return_mode = PROCESS.INACTIVE, - mode_set = PROCESS.MAX_BURN, - start_fail = START_STATUS.OK, + mode = PROCESS.INACTIVE, ---@type PROCESS + last_mode = PROCESS.INACTIVE, ---@type PROCESS + return_mode = PROCESS.INACTIVE, ---@type PROCESS + mode_set = PROCESS.MAX_BURN, ---@type PROCESS + start_fail = START_STATUS.OK, ---@type START_STATUS max_burn_combined = 0.0, -- maximum burn rate to clamp at burn_target = 0.1, -- burn rate target for aggregate burn mode charge_setpoint = 0, -- FE charge target setpoint @@ -101,8 +116,8 @@ function facility.new(config) last_error = 0.0, last_time = 0.0, -- waste processing - waste_product = WASTE.PLUTONIUM, - current_waste_product = WASTE.PLUTONIUM, + waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT + current_waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT pu_fallback = false, sps_low_power = false, disabled_sps = false, @@ -126,14 +141,16 @@ function facility.new(config) imtx_faulted_times = { 0, 0, 0 } } + --#region SETUP + -- provide self to facility update functions local f_update = fac_update(self) -- create units for i = 1, config.UnitCount do - table.insert(self.units, - unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling)) + table.insert(self.units, unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling)) table.insert(self.group_map, AUTO_GROUP.MANUAL) + table.insert(self.last_unit_states, false) end -- list for RTU session management @@ -149,6 +166,62 @@ function facility.new(config) table.insert(self.test_tone_states, false) end + --#endregion + + -- PRIVATE FUNCTIONS -- + + ---@param auto_cfg start_auto_config configuration + ---@return boolean ready, number[] unit_limits + local function _auto_check_and_save(auto_cfg) + local ready = false + + -- load up current limits + local limits = {} + for i = 1, config.UnitCount do + limits[i] = self.units[i].get_control_inf().lim_br100 * 100 + end + + -- only allow changes if not running + if self.mode == PROCESS.INACTIVE then + if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then + self.mode_set = auto_cfg.mode + end + + if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then + self.burn_target = auto_cfg.burn_target + end + + if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then + self.charge_setpoint = auto_cfg.charge_target * CHARGE_SCALER + end + + if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then + self.gen_rate_setpoint = auto_cfg.gen_target * GEN_SCALER + end + + if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then + for i = 1, config.UnitCount do + local limit = auto_cfg.limits[i] + + if (type(limit) == "number") and (limit >= 0.1) then + limits[i] = limit + self.units[i].set_burn_limit(limit) + end + end + end + + ready = self.mode_set > 0 + + if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or + ((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or + ((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then + ready = false + end + end + + return ready, limits + end + -- PUBLIC FUNCTIONS -- ---@class facility @@ -239,6 +312,42 @@ function facility.new(config) -- update (iterate) the facility management function public.update() + -- attempt reboot recovery if in progress + if self.recovery == RCV_STATE.RUNNING then + -- try to start auto control + if self.recovery_boot_state.mode ~= nil and self.units_ready then + self.recovery_boot_state.mode = nil + self.mode = self.mode_set + log.info("FAC: process startup resume initiated") + end + + local recovered = self.recovery_boot_state.mode == nil + + -- restore manual control reactors + for i = 1, #self.units do + if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then + recovered = false + + if self.units[i].get_control_inf().ready then + local plc_s = svsessions.get_reactor_session(i) + if plc_s ~= nil then + plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE) + log.info("FAC: startup resume enabling manually controlled reactor unit #" .. i) + + -- only execute once + self.recovery_boot_state.unit_states[i] = nil + end + end + end + end + + if recovered then + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: startup resume complete") + end + end + -- run process control and evaluate automatic SCRAM f_update.pre_auto() f_update.auto_control(config.ExtChargeIdling) @@ -267,6 +376,35 @@ function facility.new(config) --#endregion + --#region Startup Recovery + + ---@param state sv_control_state + function public.startup_recovery_init(state) + if self.recovery == RCV_STATE.INACTIVE then + self.recovery_boot_state = state + self.recovery = RCV_STATE.PRIMED + end + end + + -- attempt startup recovery + ---@param auto_cfg start_auto_config configuration + function public.startup_recovery_start(auto_cfg) + if self.recovery == RCV_STATE.PRIMED and self.recovery_boot_state and + self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then + self.recovery = util.trinary(_auto_check_and_save(auto_cfg), RCV_STATE.RUNNING, RCV_STATE.STOPPED) + log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "ready", "failed"))) + else self.recovery = RCV_STATE.STOPPED end + end + + -- used on certain coordinator commands to end reboot recovery (remain in current operational state) + function public.cancel_recovery() + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: process startup resume cancelled by user operation") + end + + --#endregion + --#region Commands -- SCRAM all reactor units @@ -290,59 +428,13 @@ function facility.new(config) function public.auto_stop() self.mode = PROCESS.INACTIVE end -- set automatic control configuration and start the process - ---@param auto_cfg sys_auto_config configuration + ---@param auto_cfg start_auto_config configuration ---@return table response ready state (successfully started) and current configuration (after updating) function public.auto_start(auto_cfg) - local charge_scaler = 1000000 -- convert MFE to FE - local gen_scaler = 1000 -- convert kFE to FE - local ready = false + local ready, limits = _auto_check_and_save(auto_cfg) - -- load up current limits - local limits = {} - for i = 1, config.UnitCount do - limits[i] = self.units[i].get_control_inf().lim_br100 * 100 - end - - -- only allow changes if not running - if self.mode == PROCESS.INACTIVE then - if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then - self.mode_set = auto_cfg.mode - end - - if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then - self.burn_target = auto_cfg.burn_target - end - - if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then - self.charge_setpoint = auto_cfg.charge_target * charge_scaler - end - - if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then - self.gen_rate_setpoint = auto_cfg.gen_target * gen_scaler - end - - if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then - for i = 1, config.UnitCount do - local limit = auto_cfg.limits[i] - - if (type(limit) == "number") and (limit >= 0.1) then - limits[i] = limit - self.units[i].set_burn_limit(limit) - end - end - end - - ready = self.mode_set > 0 - - if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or - ((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or - ((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then - ready = false - end - - ready = ready and self.units_ready - - if ready then self.mode = self.mode_set end + if ready and self.units_ready then + self.mode = self.mode_set end log.debug(util.c("FAC: process start ", util.trinary(ready, "accepted", "rejected"))) @@ -351,8 +443,8 @@ function facility.new(config) ready, self.mode_set, self.burn_target, - self.charge_setpoint / charge_scaler, - self.gen_rate_setpoint / gen_scaler, + self.charge_setpoint / CHARGE_SCALER, + self.gen_rate_setpoint / GEN_SCALER, limits } end diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index 5e6fa07..9526596 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -650,8 +650,16 @@ function update.auto_safety() end end --- update last mode and set next mode +-- update last mode, set next mode, and update saved state as needed function update.post_auto() + if self.mode ~= next_mode then + settings.set("LastProcessState", next_mode) + local saved = settings.save("/supervisor.settings") + if not saved then + log.warning("facility_update.post_auto(): failed to save supervisor settings file") + end + end + self.last_mode = self.mode self.mode = next_mode end @@ -792,6 +800,7 @@ end function update.unit_mgmt() local insufficent_po_rate = false local need_emcool = false + local write_state = false for i = 1, #self.units do local u = self.units[i] @@ -807,6 +816,22 @@ function update.unit_mgmt() if (self.cooling_conf.fac_tank_mode > 0) and u.is_emer_cool_tripped() and (self.cooling_conf.fac_tank_defs[i] == 2) then need_emcool = true end + + -- check for control state changes to save + if self.last_unit_states[i] ~= u.get_control_state() then + self.last_unit_states[i] = u.get_control_state() + write_state = true + end + end + + -- record unit control states + + if write_state then + settings.set("LastUnitStates", self.last_unit_states) + local saved = settings.save("/supervisor.settings") + if not saved then + log.warning("facility_update.unit_mgmt(): failed to save supervisor settings file") + end end -- update waste product diff --git a/supervisor/session/coordinator.lua b/supervisor/session/coordinator.lua index b011d44..a606ada 100644 --- a/supervisor/session/coordinator.lua +++ b/supervisor/session/coordinator.lua @@ -234,6 +234,23 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim if pkt.type == CRDN_TYPE.INITIAL_BUILDS then -- acknowledgement to coordinator receiving builds self.acks.builds = true + elseif pkt.type == CRDN_TYPE.PROCESS_READY then + if pkt.length == 5 then + -- coordinator has sent all initial process data, power-on recovery is now possible + + ---@type start_auto_config + local config = { + mode = pkt.data[1], ---@type PROCESS + burn_target = pkt.data[2], ---@type number + charge_target = pkt.data[3], ---@type number + gen_target = pkt.data[4], ---@type number + limits = pkt.data[5] ---@type number[] + } + + facility.startup_recovery_start(config) + else + log.debug(log_tag .. "CRDN process ready packet length mismatch") + end elseif pkt.type == CRDN_TYPE.FAC_BUILDS then -- acknowledgement to coordinator receiving builds self.acks.fac_builds = true @@ -243,8 +260,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim if cmd == FAC_COMMAND.SCRAM_ALL then facility.scram_all() + facility.cancel_recovery() _send(CRDN_TYPE.FAC_CMD, { cmd, true }) elseif cmd == FAC_COMMAND.STOP then + facility.cancel_recovery() + local was_active = facility.auto_is_active() if was_active then @@ -253,15 +273,16 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim _send(CRDN_TYPE.FAC_CMD, { cmd, was_active }) elseif cmd == FAC_COMMAND.START then + facility.cancel_recovery() + if pkt.length == 6 then - ---@type sys_auto_config ----@diagnostic disable-next-line: missing-fields + ---@class start_auto_config local config = { - mode = pkt.data[2], - burn_target = pkt.data[3], - charge_target = pkt.data[4], - gen_target = pkt.data[5], - limits = pkt.data[6] + mode = pkt.data[2], ---@type PROCESS + burn_target = pkt.data[3], ---@type number + charge_target = pkt.data[4], ---@type number + gen_target = pkt.data[5], ---@type number + limits = pkt.data[6] ---@type number[] } _send(CRDN_TYPE.FAC_CMD, { cmd, table.unpack(facility.auto_start(config)) }) @@ -313,8 +334,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim local manual = facility.get_group(uid) == AUTO_GROUP.MANUAL if cmd == UNIT_COMMAND.SCRAM then + facility.cancel_recovery() out_queue.push_data(SV_Q_DATA.SCRAM, data) elseif cmd == UNIT_COMMAND.START then + facility.cancel_recovery() + if manual then out_queue.push_data(SV_Q_DATA.START, data) else @@ -324,6 +348,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim elseif cmd == UNIT_COMMAND.RESET_RPS then out_queue.push_data(SV_Q_DATA.RESET_RPS, data) elseif cmd == UNIT_COMMAND.SET_BURN then + facility.cancel_recovery() + if pkt.length == 3 then if manual then out_queue.push_data(SV_Q_DATA.SET_BURN, data) @@ -354,6 +380,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim log.debug(log_tag .. "CRDN unit command reset alarm missing alarm id") end elseif cmd == UNIT_COMMAND.SET_GROUP then + facility.cancel_recovery() + if (pkt.length == 3) and (type(pkt.data[3]) == "number") and (pkt.data[3] >= AUTO_GROUP.MANUAL) and (pkt.data[3] <= AUTO_GROUP.BACKUP) then facility.set_group(unit.get_id(), pkt.data[3]) diff --git a/supervisor/session/plc.lua b/supervisor/session/plc.lua index bedbd8b..0217deb 100644 --- a/supervisor/session/plc.lua +++ b/supervisor/session/plc.lua @@ -61,7 +61,6 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, local log_tag = "plc_session(" .. id .. "): " local self = { - commanded_state = false, commanded_burn_rate = 0.0, auto_cmd_token = 0, ramping_rate = false, diff --git a/supervisor/startup.lua b/supervisor/startup.lua index 32651da..c2686e0 100644 --- a/supervisor/startup.lua +++ b/supervisor/startup.lua @@ -147,6 +147,9 @@ local function main() -- halve the rate heartbeat LED flash local heartbeat_toggle = true + -- init startup recovery + sv_facility.startup_recovery_init(supervisor.boot_state) + -- event loop while true do local event, param1, param2, param3, param4, param5 = util.pull_event() diff --git a/supervisor/supervisor.lua b/supervisor/supervisor.lua index fe7b011..4df3bc5 100644 --- a/supervisor/supervisor.lua +++ b/supervisor/supervisor.lua @@ -19,10 +19,21 @@ local config = {} supervisor.config = config --- load the supervisor configuration +-- load the supervisor configuration and startup state function supervisor.load_config() if not settings.load("/supervisor.settings") then return false end + ---@class sv_control_state + local boot_state = { + mode = settings.get("LastProcessState"), ---@type PROCESS + unit_states = settings.get("LastUnitStates") ---@type boolean[] + } + + -- only record boot state if likely valid + if type(boot_state.mode) == "number" and type(boot_state.unit_states) == "table" then + supervisor.boot_state = boot_state + end + config.UnitCount = settings.get("UnitCount") config.CoolingConfig = settings.get("CoolingConfig") config.FacilityTankMode = settings.get("FacilityTankMode") diff --git a/supervisor/unit.lua b/supervisor/unit.lua index dc59cff..8bbac07 100644 --- a/supervisor/unit.lua +++ b/supervisor/unit.lua @@ -917,6 +917,12 @@ function unit.new(reactor_id, num_boilers, num_turbines, ext_idle) return status end + -- check the commanded control state of the reactor (if connected) + ---@nodiscard + function public.get_control_state() + if self.plc_i ~= nil then return self.plc_i.get_db().control_state else return false end + end + -- get the current burn rate (actual rate) ---@nodiscard function public.get_burn_rate() From f32855084e972c82408ec60a863fa1f55e8bc7f2 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sat, 8 Feb 2025 22:20:00 -0500 Subject: [PATCH 02/11] #589 WIP reboot recovery --- supervisor/facility.lua | 52 ++++++++++++++++++++++++++-------- supervisor/facility_update.lua | 9 +++--- supervisor/startup.lua | 4 ++- supervisor/unit.lua | 12 ++++---- 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/supervisor/facility.lua b/supervisor/facility.lua index 6525d84..354ceb5 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -80,7 +80,7 @@ function facility.new(config) -- process control recovery = RCV_STATE.INACTIVE, ---@type RECOVERY_STATE recovery_boot_state = nil, ---@type sv_control_state|nil - last_unit_states = nil, ---@type boolean[] + last_unit_states = {}, ---@type boolean[] units_ready = false, mode = PROCESS.INACTIVE, ---@type PROCESS last_mode = PROCESS.INACTIVE, ---@type PROCESS @@ -166,6 +166,13 @@ function facility.new(config) table.insert(self.test_tone_states, false) end + -- init next boot state + settings.set("LastProcessState", PROCESS.INACTIVE) + settings.set("LastUnitStates", self.last_unit_states) + if not settings.save("/supervisor.settings") then + log.warning("FAC: failed to save initial control state into supervisor settings file") + end + --#endregion -- PRIVATE FUNCTIONS -- @@ -316,19 +323,24 @@ function facility.new(config) if self.recovery == RCV_STATE.RUNNING then -- try to start auto control if self.recovery_boot_state.mode ~= nil and self.units_ready then + if self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then + self.mode = self.mode_set + log.info("FAC: process startup resume initiated") + end + self.recovery_boot_state.mode = nil - self.mode = self.mode_set - log.info("FAC: process startup resume initiated") end - local recovered = self.recovery_boot_state.mode == nil + local recovered = self.recovery_boot_state.mode == nil or self.recovery_boot_state.mode == PROCESS.INACTIVE -- restore manual control reactors for i = 1, #self.units do + local u = self.units[i] + if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then recovered = false - if self.units[i].get_control_inf().ready then + if u.get_control_inf().ready then local plc_s = svsessions.get_reactor_session(i) if plc_s ~= nil then plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE) @@ -344,7 +356,7 @@ function facility.new(config) if recovered then self.recovery = RCV_STATE.STOPPED self.recovery_boot_state = nil - log.info("FAC: startup resume complete") + log.info("FAC: startup resume sequence completed") end end @@ -378,29 +390,45 @@ function facility.new(config) --#region Startup Recovery + -- on exit, use this to clear the boot state so we don't resume when exiting cleanly + function public.clear_boot_state() + settings.unset("LastProcessState") + settings.unset("LastUnitStates") + + local saved = settings.save("/supervisor.settings") + if not saved then + log.warning("facility.clear_boot_state(): failed to save supervisor settings file") + else + log.debug("FAC: cleared boot state on exit") + end + end + + -- initialize startup recovery ---@param state sv_control_state function public.startup_recovery_init(state) if self.recovery == RCV_STATE.INACTIVE then self.recovery_boot_state = state self.recovery = RCV_STATE.PRIMED + log.info("FAC: startup resume ready") end end -- attempt startup recovery ---@param auto_cfg start_auto_config configuration function public.startup_recovery_start(auto_cfg) - if self.recovery == RCV_STATE.PRIMED and self.recovery_boot_state and - self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then + if self.recovery == RCV_STATE.PRIMED then self.recovery = util.trinary(_auto_check_and_save(auto_cfg), RCV_STATE.RUNNING, RCV_STATE.STOPPED) - log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "ready", "failed"))) + log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "started", "failed"))) else self.recovery = RCV_STATE.STOPPED end end -- used on certain coordinator commands to end reboot recovery (remain in current operational state) function public.cancel_recovery() - self.recovery = RCV_STATE.STOPPED - self.recovery_boot_state = nil - log.info("FAC: process startup resume cancelled by user operation") + if self.recovery == RCV_STATE.RUNNING then + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: process startup resume cancelled by user operation") + end end --#endregion diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index 9526596..9b5f6fd 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -817,9 +817,9 @@ function update.unit_mgmt() need_emcool = true end - -- check for control state changes to save - if self.last_unit_states[i] ~= u.get_control_state() then - self.last_unit_states[i] = u.get_control_state() + -- check for enabled state changes to save + if self.last_unit_states[i] ~= u.is_reactor_enabled() then + self.last_unit_states[i] = u.is_reactor_enabled() write_state = true end end @@ -828,8 +828,7 @@ function update.unit_mgmt() if write_state then settings.set("LastUnitStates", self.last_unit_states) - local saved = settings.save("/supervisor.settings") - if not saved then + if not settings.save("/supervisor.settings") then log.warning("facility_update.unit_mgmt(): failed to save supervisor settings file") end end diff --git a/supervisor/startup.lua b/supervisor/startup.lua index c2686e0..0b5c1e6 100644 --- a/supervisor/startup.lua +++ b/supervisor/startup.lua @@ -22,7 +22,7 @@ local supervisor = require("supervisor.supervisor") local svsessions = require("supervisor.session.svsessions") -local SUPERVISOR_VERSION = "v1.6.2" +local SUPERVISOR_VERSION = "v1.6.3" local println = util.println local println_ts = util.println_ts @@ -240,6 +240,8 @@ local function main() end end + sv_facility.clear_boot_state() + renderer.close_ui() util.println_ts("exited") diff --git a/supervisor/unit.lua b/supervisor/unit.lua index 8bbac07..01d0d3e 100644 --- a/supervisor/unit.lua +++ b/supervisor/unit.lua @@ -840,6 +840,12 @@ function unit.new(reactor_id, num_boilers, num_turbines, ext_idle) return false end + -- check the active state of the reactor (if connected) + ---@nodiscard + function public.is_reactor_enabled() + if self.plc_i ~= nil then return self.plc_i.get_status().status else return false end + end + -- check if the reactor is connected, is stopped, the RPS is not tripped, and no alarms are active ---@nodiscard function public.is_safe_idle() @@ -917,12 +923,6 @@ function unit.new(reactor_id, num_boilers, num_turbines, ext_idle) return status end - -- check the commanded control state of the reactor (if connected) - ---@nodiscard - function public.get_control_state() - if self.plc_i ~= nil then return self.plc_i.get_db().control_state else return false end - end - -- get the current burn rate (actual rate) ---@nodiscard function public.get_burn_rate() From 4e31b33b092d2ade970161302b7a2d3a00756ca3 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 11:59:03 -0500 Subject: [PATCH 03/11] #601 reset RPS if the triggering condition is a timeout on PLC session establish --- reactor-plc/plc.lua | 3 +-- supervisor/session/plc.lua | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/reactor-plc/plc.lua b/reactor-plc/plc.lua index a84e55a..73b95ce 100644 --- a/reactor-plc/plc.lua +++ b/reactor-plc/plc.lua @@ -23,8 +23,7 @@ local AUTO_ACK = comms.PLC_AUTO_ACK local RPS_LIMITS = const.RPS_LIMITS --- I sure hope the devs don't change this error message, not that it would have safety implications --- I wish they didn't change it to be like this +-- specific errors thrown when scram/start is used that still count as success local PCALL_SCRAM_MSG = "Scram requires the reactor to be active." local PCALL_START_MSG = "Reactor is already active." diff --git a/supervisor/session/plc.lua b/supervisor/session/plc.lua index 0217deb..3ef4134 100644 --- a/supervisor/session/plc.lua +++ b/supervisor/session/plc.lua @@ -61,6 +61,7 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, local log_tag = "plc_session(" .. id .. "): " local self = { + initial_reset = true, commanded_burn_rate = 0.0, auto_cmd_token = 0, ramping_rate = false, @@ -379,7 +380,14 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, if pkt.length == 14 then local status = pcall(_copy_rps_status, pkt.data) if status then - -- copied in RPS status data OK + -- copied in RPS status data OK, try initial reset if applicable + if self.initial_reset then + self.initial_reset = false + if self.sDB.rps_trip_cause == "timeout" then + _send(RPLC_TYPE.RPS_AUTO_RESET, {}) + log.debug(log_tag .. "initial RPS reset on timeout status sent") + end + end else -- error copying RPS status data log.error(log_tag .. "failed to parse RPS status packet data") @@ -392,7 +400,14 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, if pkt.length == 13 then local status = pcall(_copy_rps_status, { true, table.unpack(pkt.data) }) if status then - -- copied in RPS status data OK + -- copied in RPS status data OK, try initial reset if applicable + if self.initial_reset then + self.initial_reset = false + if self.sDB.rps_trip_cause == "timeout" then + _send(RPLC_TYPE.RPS_AUTO_RESET, {}) + log.debug(log_tag .. "initial RPS reset on timeout alarm sent") + end + end else -- error copying RPS status data log.error(log_tag .. "failed to parse RPS alarm status data") From cab3427c7044c383f08e4bd146f909dc4776df5a Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 12:10:13 -0500 Subject: [PATCH 04/11] #601 only reset on timeout once per unit per supervisor boot --- supervisor/session/plc.lua | 12 ++++++------ supervisor/session/svsessions.lua | 5 ++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/supervisor/session/plc.lua b/supervisor/session/plc.lua index 3ef4134..68a8791 100644 --- a/supervisor/session/plc.lua +++ b/supervisor/session/plc.lua @@ -53,15 +53,15 @@ local PERIODICS = { ---@param in_queue mqueue in message queue ---@param out_queue mqueue out message queue ---@param timeout number communications timeout +---@param initial_reset boolean[] initial PLC reset on timeout flags, indexed by reactor_id ---@param fp_ok boolean if the front panel UI is running -function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, timeout, fp_ok) +function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, timeout, initial_reset, fp_ok) -- print a log message to the terminal as long as the UI isn't running local function println(message) if not fp_ok then util.println_ts(message) end end local log_tag = "plc_session(" .. id .. "): " local self = { - initial_reset = true, commanded_burn_rate = 0.0, auto_cmd_token = 0, ramping_rate = false, @@ -381,8 +381,8 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, local status = pcall(_copy_rps_status, pkt.data) if status then -- copied in RPS status data OK, try initial reset if applicable - if self.initial_reset then - self.initial_reset = false + if initial_reset[reactor_id] then + initial_reset[reactor_id] = false if self.sDB.rps_trip_cause == "timeout" then _send(RPLC_TYPE.RPS_AUTO_RESET, {}) log.debug(log_tag .. "initial RPS reset on timeout status sent") @@ -401,8 +401,8 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, local status = pcall(_copy_rps_status, { true, table.unpack(pkt.data) }) if status then -- copied in RPS status data OK, try initial reset if applicable - if self.initial_reset then - self.initial_reset = false + if initial_reset[reactor_id] then + initial_reset[reactor_id] = false if self.sDB.rps_trip_cause == "timeout" then _send(RPLC_TYPE.RPS_AUTO_RESET, {}) log.debug(log_tag .. "initial RPS reset on timeout alarm sent") diff --git a/supervisor/session/svsessions.lua b/supervisor/session/svsessions.lua index ac216d6..59d8e70 100644 --- a/supervisor/session/svsessions.lua +++ b/supervisor/session/svsessions.lua @@ -45,6 +45,7 @@ local self = { fp_ok = false, config = nil, ---@type svr_config facility = nil, ---@type facility|nil + plc_ini_reset = {}, -- lists of connected sessions ---@diagnostic disable: missing-fields sessions = { @@ -392,6 +393,8 @@ function svsessions.init(nic, fp_ok, config, facility) end self.dev_dbg.connected.units[i] = conns + + self.plc_ini_reset[i] = true end end @@ -486,7 +489,7 @@ function svsessions.establish_plc_session(source_addr, i_seq_num, for_reactor, v local id = self.next_ids.plc - plc_s.instance = plc.new_session(id, source_addr, i_seq_num, for_reactor, plc_s.in_queue, plc_s.out_queue, self.config.PLC_Timeout, self.fp_ok) + plc_s.instance = plc.new_session(id, source_addr, i_seq_num, for_reactor, plc_s.in_queue, plc_s.out_queue, self.config.PLC_Timeout, self.plc_ini_reset, self.fp_ok) table.insert(self.sessions.plc, plc_s) local units = self.facility.get_units() From 40cb9f599a2fd85edb497eec9b88fb68bf5da904 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:06:44 -0500 Subject: [PATCH 05/11] #602 only auto reset units that should be --- supervisor/facility_update.lua | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index 9b5f6fd..a74d3f2 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -642,9 +642,10 @@ function update.auto_safety() self.ascram_reason = AUTO_SCRAM.NONE -- reset PLC RPS trips if we should - for i = 1, #self.units do - local u = self.units[i] - u.auto_cond_rps_reset() + for i = 1, #self.prio_defs do + for _, u in pairs(self.prio_defs[i]) do + u.auto_cond_rps_reset() + end end end end From 556331f75b6bbbef7ffc3d9bc8bf14ef90fa2255 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:07:01 -0500 Subject: [PATCH 06/11] better unit ready check --- supervisor/session/plc.lua | 15 +++++++++++++-- supervisor/unitlogic.lua | 9 ++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/supervisor/session/plc.lua b/supervisor/session/plc.lua index 68a8791..4eea46d 100644 --- a/supervisor/session/plc.lua +++ b/supervisor/session/plc.lua @@ -72,6 +72,7 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, connected = true, received_struct = false, received_status_cache = false, + received_rps_status = false, conn_watchdog = util.new_watchdog(timeout), last_rtt = 0, -- periodic messages @@ -380,7 +381,10 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, if pkt.length == 14 then local status = pcall(_copy_rps_status, pkt.data) if status then - -- copied in RPS status data OK, try initial reset if applicable + -- copied in RPS status data OK + self.received_rps_status = true + + -- try initial reset if needed if initial_reset[reactor_id] then initial_reset[reactor_id] = false if self.sDB.rps_trip_cause == "timeout" then @@ -400,7 +404,10 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, if pkt.length == 13 then local status = pcall(_copy_rps_status, { true, table.unpack(pkt.data) }) if status then - -- copied in RPS status data OK, try initial reset if applicable + -- copied in RPS status data OK + self.received_rps_status = true + + -- try initial reset if needed if initial_reset[reactor_id] then initial_reset[reactor_id] = false if self.sDB.rps_trip_cause == "timeout" then @@ -501,6 +508,10 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, ---@nodiscard function public.get_db() return self.sDB end + -- check if the reactor structure, status, and RPS status have been received + ---@nodiscard + function public.check_received_all_data() return self.received_struct and self.received_status_cache and self.received_rps_status end + -- check if ramping is completed by first verifying auto command token ack ---@nodiscard function public.is_ramp_complete() diff --git a/supervisor/unitlogic.lua b/supervisor/unitlogic.lua index f0e4c33..8364d20 100644 --- a/supervisor/unitlogic.lua +++ b/supervisor/unitlogic.lua @@ -67,11 +67,10 @@ function logic.update_annunciator(self) local plc_db = self.plc_i.get_db() -- update ready state - -- - can't be tripped - -- - must have received status at least once - -- - must have received struct at least once - plc_ready = plc_db.formed and (not plc_db.no_reactor) and (not plc_db.rps_tripped) and - (next(self.plc_i.get_status()) ~= nil) and (next(self.plc_i.get_struct()) ~= nil) + -- - must be connected to a formed reactor + -- - can't have a tripped RPS + -- - must have received status, struct, and RPS status at least once + plc_ready = plc_db.formed and (not plc_db.no_reactor) and (not plc_db.rps_tripped) and self.plc_i.check_received_all_data() -- update auto control limit if (plc_db.mek_struct.max_burn > 0) and ((self.db.control.lim_br100 / 100) > plc_db.mek_struct.max_burn) then From 22cdbc8638dfbb54346031ca18add737d79d3dab Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:07:36 -0500 Subject: [PATCH 07/11] #589 supervisor control reboot recovery --- supervisor/facility.lua | 7 +++---- supervisor/facility_update.lua | 13 +++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/supervisor/facility.lua b/supervisor/facility.lua index 354ceb5..70b7ad0 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -395,8 +395,7 @@ function facility.new(config) settings.unset("LastProcessState") settings.unset("LastUnitStates") - local saved = settings.save("/supervisor.settings") - if not saved then + if not settings.save("/supervisor.settings") then log.warning("facility.clear_boot_state(): failed to save supervisor settings file") else log.debug("FAC: cleared boot state on exit") @@ -404,9 +403,9 @@ function facility.new(config) end -- initialize startup recovery - ---@param state sv_control_state + ---@param state sv_control_state|nil function public.startup_recovery_init(state) - if self.recovery == RCV_STATE.INACTIVE then + if self.recovery == RCV_STATE.INACTIVE and state then self.recovery_boot_state = state self.recovery = RCV_STATE.PRIMED log.info("FAC: startup resume ready") diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index a74d3f2..f951661 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -243,6 +243,11 @@ function update.auto_control(ExtChargeIdling) log.debug(util.c("FAC: state changed from ", PROCESS_NAMES[self.last_mode + 1], " to ", PROCESS_NAMES[self.mode + 1])) + settings.set("LastProcessState", self.mode) + if not settings.save("/supervisor.settings") then + log.warning("facility_update.auto_control(): failed to save supervisor settings file") + end + if (self.last_mode == PROCESS.INACTIVE) or (self.last_mode == PROCESS.GEN_RATE_FAULT_IDLE) then self.start_fail = START_STATUS.OK @@ -653,14 +658,6 @@ end -- update last mode, set next mode, and update saved state as needed function update.post_auto() - if self.mode ~= next_mode then - settings.set("LastProcessState", next_mode) - local saved = settings.save("/supervisor.settings") - if not saved then - log.warning("facility_update.post_auto(): failed to save supervisor settings file") - end - end - self.last_mode = self.mode self.mode = next_mode end From 54167e21136c40df2f2f63ed21bcbd0a19276b65 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:13:18 -0500 Subject: [PATCH 08/11] #589 only scram reactor on plc boot if networked --- reactor-plc/startup.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/reactor-plc/startup.lua b/reactor-plc/startup.lua index 551e6a3..0ba81d8 100644 --- a/reactor-plc/startup.lua +++ b/reactor-plc/startup.lua @@ -18,7 +18,7 @@ local plc = require("reactor-plc.plc") local renderer = require("reactor-plc.renderer") local threads = require("reactor-plc.threads") -local R_PLC_VERSION = "v1.8.14" +local R_PLC_VERSION = "v1.8.15" local println = util.println local println_ts = util.println_ts @@ -169,12 +169,12 @@ local function main() -- PLC init
--- EVENT_CONSUMER: this function consumes events local function init() - -- just booting up, no fission allowed (neutrons stay put thanks) - if (not plc_state.no_reactor) and plc_state.reactor_formed and smem_dev.reactor.getStatus() then + -- scram on boot if networked, otherwise leave the reactor be + if __shared_memory.networked and (not plc_state.no_reactor) and plc_state.reactor_formed and smem_dev.reactor.getStatus() then smem_dev.reactor.scram() end - -- front panel time! + -- setup front panel if not renderer.ui_ready() then local message plc_state.fp_ok, message = renderer.try_start_ui(config.FrontPanelTheme, config.ColorMode) From 3b2fb0028515a475850e6df46c9a2aa7dd947dfa Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:37:22 -0500 Subject: [PATCH 09/11] cleanup --- coordinator/startup.lua | 2 +- supervisor/facility.lua | 4 ++-- supervisor/session/coordinator.lua | 10 +++++----- supervisor/session/svsessions.lua | 3 +-- supervisor/supervisor.lua | 5 ++++- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/coordinator/startup.lua b/coordinator/startup.lua index d3413e3..a7209e8 100644 --- a/coordinator/startup.lua +++ b/coordinator/startup.lua @@ -19,7 +19,7 @@ local renderer = require("coordinator.renderer") local sounder = require("coordinator.sounder") local threads = require("coordinator.threads") -local COORDINATOR_VERSION = "v1.6.4" +local COORDINATOR_VERSION = "v1.6.5" local CHUNK_LOAD_DELAY_S = 30.0 diff --git a/supervisor/facility.lua b/supervisor/facility.lua index 70b7ad0..a4fb432 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -79,7 +79,7 @@ function facility.new(config) io_ctl = nil, ---@type rs_controller -- process control recovery = RCV_STATE.INACTIVE, ---@type RECOVERY_STATE - recovery_boot_state = nil, ---@type sv_control_state|nil + recovery_boot_state = nil, ---@type sv_boot_state|nil last_unit_states = {}, ---@type boolean[] units_ready = false, mode = PROCESS.INACTIVE, ---@type PROCESS @@ -403,7 +403,7 @@ function facility.new(config) end -- initialize startup recovery - ---@param state sv_control_state|nil + ---@param state sv_boot_state|nil function public.startup_recovery_init(state) if self.recovery == RCV_STATE.INACTIVE and state then self.recovery_boot_state = state diff --git a/supervisor/session/coordinator.lua b/supervisor/session/coordinator.lua index a606ada..aeabc72 100644 --- a/supervisor/session/coordinator.lua +++ b/supervisor/session/coordinator.lua @@ -240,11 +240,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim ---@type start_auto_config local config = { - mode = pkt.data[1], ---@type PROCESS - burn_target = pkt.data[2], ---@type number - charge_target = pkt.data[3], ---@type number - gen_target = pkt.data[4], ---@type number - limits = pkt.data[5] ---@type number[] + mode = pkt.data[1], + burn_target = pkt.data[2], + charge_target = pkt.data[3], + gen_target = pkt.data[4], + limits = pkt.data[5] } facility.startup_recovery_start(config) diff --git a/supervisor/session/svsessions.lua b/supervisor/session/svsessions.lua index 59d8e70..087fe19 100644 --- a/supervisor/session/svsessions.lua +++ b/supervisor/session/svsessions.lua @@ -392,9 +392,8 @@ function svsessions.init(nic, fp_ok, config, facility) conns.tanks[1] = true end - self.dev_dbg.connected.units[i] = conns - self.plc_ini_reset[i] = true + self.dev_dbg.connected.units[i] = conns end end diff --git a/supervisor/supervisor.lua b/supervisor/supervisor.lua index 4df3bc5..47ee6d2 100644 --- a/supervisor/supervisor.lua +++ b/supervisor/supervisor.lua @@ -19,11 +19,14 @@ local config = {} supervisor.config = config +-- control state from last unexpected shutdown +supervisor.boot_state = nil ---@type sv_boot_state|nil + -- load the supervisor configuration and startup state function supervisor.load_config() if not settings.load("/supervisor.settings") then return false end - ---@class sv_control_state + ---@class sv_boot_state local boot_state = { mode = settings.get("LastProcessState"), ---@type PROCESS unit_states = settings.get("LastUnitStates") ---@type boolean[] From ecdaf78ed01aa385a8bd6f9dbac281dd4ea58560 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 13:48:20 -0500 Subject: [PATCH 10/11] #589 moved boot recovery to facility update file --- supervisor/facility.lua | 46 +++--------------------- supervisor/facility_update.lua | 66 ++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 49 deletions(-) diff --git a/supervisor/facility.lua b/supervisor/facility.lua index a4fb432..de13c9b 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -5,7 +5,6 @@ local util = require("scada-common.util") local unit = require("supervisor.unit") local fac_update = require("supervisor.facility_update") -local plc = require("supervisor.session.plc") local rsctl = require("supervisor.session.rsctl") local svsessions = require("supervisor.session.svsessions") @@ -53,7 +52,7 @@ function facility.new(config) ---@class _facility_self local self = { units = {}, ---@type reactor_unit[] - types = { AUTO_SCRAM = AUTO_SCRAM, START_STATUS = START_STATUS }, + types = { AUTO_SCRAM = AUTO_SCRAM, START_STATUS = START_STATUS, RCV_STATE = RCV_STATE }, status_text = { "START UP", "initializing..." }, all_sys_ok = false, allow_testing = false, @@ -177,6 +176,7 @@ function facility.new(config) -- PRIVATE FUNCTIONS -- + -- check an auto process control configuration and save it if its valid (does not start the process) ---@param auto_cfg start_auto_config configuration ---@return boolean ready, number[] unit_limits local function _auto_check_and_save(auto_cfg) @@ -319,46 +319,8 @@ function facility.new(config) -- update (iterate) the facility management function public.update() - -- attempt reboot recovery if in progress - if self.recovery == RCV_STATE.RUNNING then - -- try to start auto control - if self.recovery_boot_state.mode ~= nil and self.units_ready then - if self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then - self.mode = self.mode_set - log.info("FAC: process startup resume initiated") - end - - self.recovery_boot_state.mode = nil - end - - local recovered = self.recovery_boot_state.mode == nil or self.recovery_boot_state.mode == PROCESS.INACTIVE - - -- restore manual control reactors - for i = 1, #self.units do - local u = self.units[i] - - if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then - recovered = false - - if u.get_control_inf().ready then - local plc_s = svsessions.get_reactor_session(i) - if plc_s ~= nil then - plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE) - log.info("FAC: startup resume enabling manually controlled reactor unit #" .. i) - - -- only execute once - self.recovery_boot_state.unit_states[i] = nil - end - end - end - end - - if recovered then - self.recovery = RCV_STATE.STOPPED - self.recovery_boot_state = nil - log.info("FAC: startup resume sequence completed") - end - end + -- run reboot recovery routine if needed + f_update.boot_recovery() -- run process control and evaluate automatic SCRAM f_update.pre_auto() diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index f951661..738662a 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -1,17 +1,21 @@ -local audio = require("scada-common.audio") -local const = require("scada-common.constants") -local log = require("scada-common.log") -local rsio = require("scada-common.rsio") -local types = require("scada-common.types") -local util = require("scada-common.util") +local audio = require("scada-common.audio") +local const = require("scada-common.constants") +local log = require("scada-common.log") +local rsio = require("scada-common.rsio") +local types = require("scada-common.types") +local util = require("scada-common.util") -local qtypes = require("supervisor.session.rtu.qtypes") +local plc = require("supervisor.session.plc") +local svsessions = require("supervisor.session.svsessions") + +local qtypes = require("supervisor.session.rtu.qtypes") local TONE = audio.TONE local ALARM = types.ALARM local PRIO = types.ALARM_PRIORITY local ALARM_STATE = types.ALARM_STATE +local AUTO_GROUP = types.AUTO_GROUP local CONTAINER_MODE = types.CONTAINER_MODE local PROCESS = types.PROCESS local PROCESS_NAMES = types.PROCESS_NAMES @@ -131,6 +135,54 @@ end --#region PUBLIC FUNCTIONS +-- run reboot recovery routine if needed +function update.boot_recovery() + local RCV_STATE = self.types.RCV_STATE + + -- attempt reboot recovery if in progress + if self.recovery == RCV_STATE.RUNNING then + local was_inactive = self.recovery_boot_state.mode == PROCESS.INACTIVE or self.recovery_boot_state.mode == PROCESS.SYSTEM_ALARM_IDLE + + -- try to start auto control + if self.recovery_boot_state.mode ~= nil and self.units_ready then + if was_inactive then + self.mode = self.mode_set + log.info("FAC: process startup resume initiated") + end + + self.recovery_boot_state.mode = nil + end + + local recovered = self.recovery_boot_state.mode == nil or was_inactive + + -- restore manual control reactors + for i = 1, #self.units do + local u = self.units[i] + + if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then + recovered = false + + if u.get_control_inf().ready then + local plc_s = svsessions.get_reactor_session(i) + if plc_s ~= nil then + plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE) + log.info("FAC: startup resume enabling manually controlled reactor unit #" .. i) + + -- only execute once + self.recovery_boot_state.unit_states[i] = nil + end + end + end + end + + if recovered then + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: startup resume sequence completed") + end + end +end + -- automatic control pre-update logic function update.pre_auto() -- unlink RTU sessions if they are closed From 5f22069ce1dcd30e0c21c45712321118abcefe71 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sun, 9 Feb 2025 14:19:06 -0500 Subject: [PATCH 11/11] #589 cleanup and fixes --- supervisor/facility.lua | 8 ++++---- supervisor/facility_update.lua | 2 +- supervisor/session/coordinator.lua | 2 +- supervisor/startup.lua | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/supervisor/facility.lua b/supervisor/facility.lua index de13c9b..08ece5f 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -364,9 +364,9 @@ function facility.new(config) end end - -- initialize startup recovery + -- initialize facility resume boot recovery ---@param state sv_boot_state|nil - function public.startup_recovery_init(state) + function public.boot_recovery_init(state) if self.recovery == RCV_STATE.INACTIVE and state then self.recovery_boot_state = state self.recovery = RCV_STATE.PRIMED @@ -374,9 +374,9 @@ function facility.new(config) end end - -- attempt startup recovery + -- attempt facility resume boot recovery ---@param auto_cfg start_auto_config configuration - function public.startup_recovery_start(auto_cfg) + function public.boot_recovery_start(auto_cfg) if self.recovery == RCV_STATE.PRIMED then self.recovery = util.trinary(_auto_check_and_save(auto_cfg), RCV_STATE.RUNNING, RCV_STATE.STOPPED) log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "started", "failed"))) diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index 738662a..a127f81 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -145,7 +145,7 @@ function update.boot_recovery() -- try to start auto control if self.recovery_boot_state.mode ~= nil and self.units_ready then - if was_inactive then + if not was_inactive then self.mode = self.mode_set log.info("FAC: process startup resume initiated") end diff --git a/supervisor/session/coordinator.lua b/supervisor/session/coordinator.lua index aeabc72..4887f0f 100644 --- a/supervisor/session/coordinator.lua +++ b/supervisor/session/coordinator.lua @@ -247,7 +247,7 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim limits = pkt.data[5] } - facility.startup_recovery_start(config) + facility.boot_recovery_start(config) else log.debug(log_tag .. "CRDN process ready packet length mismatch") end diff --git a/supervisor/startup.lua b/supervisor/startup.lua index 0b5c1e6..f54144b 100644 --- a/supervisor/startup.lua +++ b/supervisor/startup.lua @@ -148,7 +148,7 @@ local function main() local heartbeat_toggle = true -- init startup recovery - sv_facility.startup_recovery_init(supervisor.boot_state) + sv_facility.boot_recovery_init(supervisor.boot_state) -- event loop while true do