From b3cf40a01afc5d7e777fab8a1667595b00fb4b20 Mon Sep 17 00:00:00 2001 From: Mikayla Date: Sat, 8 Feb 2025 20:35:04 +0000 Subject: [PATCH] #589 initial attempt at reboot recovery --- coordinator/coordinator.lua | 12 ++ coordinator/process.lua | 5 + scada-common/comms.lua | 27 ++-- supervisor/facility.lua | 214 +++++++++++++++++++++-------- supervisor/facility_update.lua | 27 +++- supervisor/session/coordinator.lua | 42 +++++- supervisor/session/plc.lua | 1 - supervisor/startup.lua | 3 + supervisor/supervisor.lua | 13 +- supervisor/unit.lua | 6 + 10 files changed, 266 insertions(+), 84 deletions(-) diff --git a/coordinator/coordinator.lua b/coordinator/coordinator.lua index 36163eb..c0ba2f3 100644 --- a/coordinator/coordinator.lua +++ b/coordinator/coordinator.lua @@ -380,6 +380,18 @@ function coordinator.comms(version, nic, sv_watchdog) _send_sv(PROTOCOL.SCADA_MGMT, MGMT_TYPE.CLOSE, {}) end + -- send the resume ready state to the supervisor + ---@param mode PROCESS process control mode + ---@param burn_target number burn rate target + ---@param charge_target number charge level target + ---@param gen_target number generation rate target + ---@param limits number[] unit burn rate limits + function public.send_ready(mode, burn_target, charge_target, gen_target, limits) + _send_sv(PROTOCOL.SCADA_CRDN, CRDN_TYPE.PROCESS_READY, { + mode, burn_target, charge_target, gen_target, limits + }) + end + -- send a facility command ---@param cmd FAC_COMMAND command ---@param option any? optional option options for the optional options (like waste mode) diff --git a/coordinator/process.lua b/coordinator/process.lua index 1866686..0fc7c1a 100644 --- a/coordinator/process.lua +++ b/coordinator/process.lua @@ -139,6 +139,11 @@ function process.init(iocontrol, coord_comms) log.info("PROCESS: loaded priority groups settings") end + + -- report to the supervisor all initial configuration data has been sent + -- startup resume can occur if needed + local p = ctl_proc + pctl.comms.send_ready(p.mode, p.burn_target, p.charge_target, p.gen_target, p.limits) end -- create a handle to process control for usage of commands that get acknowledgements diff --git a/scada-common/comms.lua b/scada-common/comms.lua index a8d0014..48cf416 100644 --- a/scada-common/comms.lua +++ b/scada-common/comms.lua @@ -17,7 +17,7 @@ local max_distance = nil local comms = {} -- protocol/data versions (protocol/data independent changes tracked by util.lua version) -comms.version = "3.0.4" +comms.version = "3.0.5" comms.api_version = "0.0.9" ---@enum PROTOCOL @@ -60,18 +60,19 @@ local MGMT_TYPE = { ---@enum CRDN_TYPE local CRDN_TYPE = { INITIAL_BUILDS = 0, -- initial, complete builds packet to the coordinator - FAC_BUILDS = 1, -- facility RTU builds - FAC_STATUS = 2, -- state of facility and facility devices - FAC_CMD = 3, -- faility command - UNIT_BUILDS = 4, -- build of each reactor unit (reactor + RTUs) - UNIT_STATUSES = 5, -- state of each of the reactor units - UNIT_CMD = 6, -- command a reactor unit - API_GET_FAC = 7, -- API: get the facility general data - API_GET_FAC_DTL = 8, -- API: get (detailed) data for the facility app - API_GET_UNIT = 9, -- API: get reactor unit data - API_GET_CTRL = 10, -- API: get data for the control app - API_GET_PROC = 11, -- API: get data for the process app - API_GET_WASTE = 12 -- API: get data for the waste app + PROCESS_READY = 1, -- process init is complete + last set of info for supervisor startup recovery + FAC_BUILDS = 2, -- facility RTU builds + FAC_STATUS = 3, -- state of facility and facility devices + FAC_CMD = 4, -- faility command + UNIT_BUILDS = 5, -- build of each reactor unit (reactor + RTUs) + UNIT_STATUSES = 6, -- state of each of the reactor units + UNIT_CMD = 7, -- command a reactor unit + API_GET_FAC = 8, -- API: get the facility general data + API_GET_FAC_DTL = 9, -- API: get (detailed) data for the facility app + API_GET_UNIT = 10, -- API: get reactor unit data + API_GET_CTRL = 11, -- API: get data for the control app + API_GET_PROC = 12, -- API: get data for the process app + API_GET_WASTE = 13 -- API: get data for the waste app } ---@enum ESTABLISH_ACK diff --git a/supervisor/facility.lua b/supervisor/facility.lua index e47842f..6525d84 100644 --- a/supervisor/facility.lua +++ b/supervisor/facility.lua @@ -5,6 +5,7 @@ local util = require("scada-common.util") local unit = require("supervisor.unit") local fac_update = require("supervisor.facility_update") +local plc = require("supervisor.session.plc") local rsctl = require("supervisor.session.rsctl") local svsessions = require("supervisor.session.svsessions") @@ -31,6 +32,17 @@ local START_STATUS = { BLADE_MISMATCH = 2 } +---@enum RECOVERY_STATE +local RCV_STATE = { + INACTIVE = 0, + PRIMED = 1, + RUNNING = 2, + STOPPED = 3 +} + +local CHARGE_SCALER = 1000000 -- convert MFE to FE +local GEN_SCALER = 1000 -- convert kFE to FE + ---@class facility_management local facility = {} @@ -66,12 +78,15 @@ function facility.new(config) -- redstone I/O control io_ctl = nil, ---@type rs_controller -- process control + recovery = RCV_STATE.INACTIVE, ---@type RECOVERY_STATE + recovery_boot_state = nil, ---@type sv_control_state|nil + last_unit_states = nil, ---@type boolean[] units_ready = false, - mode = PROCESS.INACTIVE, - last_mode = PROCESS.INACTIVE, - return_mode = PROCESS.INACTIVE, - mode_set = PROCESS.MAX_BURN, - start_fail = START_STATUS.OK, + mode = PROCESS.INACTIVE, ---@type PROCESS + last_mode = PROCESS.INACTIVE, ---@type PROCESS + return_mode = PROCESS.INACTIVE, ---@type PROCESS + mode_set = PROCESS.MAX_BURN, ---@type PROCESS + start_fail = START_STATUS.OK, ---@type START_STATUS max_burn_combined = 0.0, -- maximum burn rate to clamp at burn_target = 0.1, -- burn rate target for aggregate burn mode charge_setpoint = 0, -- FE charge target setpoint @@ -101,8 +116,8 @@ function facility.new(config) last_error = 0.0, last_time = 0.0, -- waste processing - waste_product = WASTE.PLUTONIUM, - current_waste_product = WASTE.PLUTONIUM, + waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT + current_waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT pu_fallback = false, sps_low_power = false, disabled_sps = false, @@ -126,14 +141,16 @@ function facility.new(config) imtx_faulted_times = { 0, 0, 0 } } + --#region SETUP + -- provide self to facility update functions local f_update = fac_update(self) -- create units for i = 1, config.UnitCount do - table.insert(self.units, - unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling)) + table.insert(self.units, unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling)) table.insert(self.group_map, AUTO_GROUP.MANUAL) + table.insert(self.last_unit_states, false) end -- list for RTU session management @@ -149,6 +166,62 @@ function facility.new(config) table.insert(self.test_tone_states, false) end + --#endregion + + -- PRIVATE FUNCTIONS -- + + ---@param auto_cfg start_auto_config configuration + ---@return boolean ready, number[] unit_limits + local function _auto_check_and_save(auto_cfg) + local ready = false + + -- load up current limits + local limits = {} + for i = 1, config.UnitCount do + limits[i] = self.units[i].get_control_inf().lim_br100 * 100 + end + + -- only allow changes if not running + if self.mode == PROCESS.INACTIVE then + if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then + self.mode_set = auto_cfg.mode + end + + if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then + self.burn_target = auto_cfg.burn_target + end + + if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then + self.charge_setpoint = auto_cfg.charge_target * CHARGE_SCALER + end + + if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then + self.gen_rate_setpoint = auto_cfg.gen_target * GEN_SCALER + end + + if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then + for i = 1, config.UnitCount do + local limit = auto_cfg.limits[i] + + if (type(limit) == "number") and (limit >= 0.1) then + limits[i] = limit + self.units[i].set_burn_limit(limit) + end + end + end + + ready = self.mode_set > 0 + + if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or + ((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or + ((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then + ready = false + end + end + + return ready, limits + end + -- PUBLIC FUNCTIONS -- ---@class facility @@ -239,6 +312,42 @@ function facility.new(config) -- update (iterate) the facility management function public.update() + -- attempt reboot recovery if in progress + if self.recovery == RCV_STATE.RUNNING then + -- try to start auto control + if self.recovery_boot_state.mode ~= nil and self.units_ready then + self.recovery_boot_state.mode = nil + self.mode = self.mode_set + log.info("FAC: process startup resume initiated") + end + + local recovered = self.recovery_boot_state.mode == nil + + -- restore manual control reactors + for i = 1, #self.units do + if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then + recovered = false + + if self.units[i].get_control_inf().ready then + local plc_s = svsessions.get_reactor_session(i) + if plc_s ~= nil then + plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE) + log.info("FAC: startup resume enabling manually controlled reactor unit #" .. i) + + -- only execute once + self.recovery_boot_state.unit_states[i] = nil + end + end + end + end + + if recovered then + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: startup resume complete") + end + end + -- run process control and evaluate automatic SCRAM f_update.pre_auto() f_update.auto_control(config.ExtChargeIdling) @@ -267,6 +376,35 @@ function facility.new(config) --#endregion + --#region Startup Recovery + + ---@param state sv_control_state + function public.startup_recovery_init(state) + if self.recovery == RCV_STATE.INACTIVE then + self.recovery_boot_state = state + self.recovery = RCV_STATE.PRIMED + end + end + + -- attempt startup recovery + ---@param auto_cfg start_auto_config configuration + function public.startup_recovery_start(auto_cfg) + if self.recovery == RCV_STATE.PRIMED and self.recovery_boot_state and + self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then + self.recovery = util.trinary(_auto_check_and_save(auto_cfg), RCV_STATE.RUNNING, RCV_STATE.STOPPED) + log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "ready", "failed"))) + else self.recovery = RCV_STATE.STOPPED end + end + + -- used on certain coordinator commands to end reboot recovery (remain in current operational state) + function public.cancel_recovery() + self.recovery = RCV_STATE.STOPPED + self.recovery_boot_state = nil + log.info("FAC: process startup resume cancelled by user operation") + end + + --#endregion + --#region Commands -- SCRAM all reactor units @@ -290,59 +428,13 @@ function facility.new(config) function public.auto_stop() self.mode = PROCESS.INACTIVE end -- set automatic control configuration and start the process - ---@param auto_cfg sys_auto_config configuration + ---@param auto_cfg start_auto_config configuration ---@return table response ready state (successfully started) and current configuration (after updating) function public.auto_start(auto_cfg) - local charge_scaler = 1000000 -- convert MFE to FE - local gen_scaler = 1000 -- convert kFE to FE - local ready = false + local ready, limits = _auto_check_and_save(auto_cfg) - -- load up current limits - local limits = {} - for i = 1, config.UnitCount do - limits[i] = self.units[i].get_control_inf().lim_br100 * 100 - end - - -- only allow changes if not running - if self.mode == PROCESS.INACTIVE then - if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then - self.mode_set = auto_cfg.mode - end - - if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then - self.burn_target = auto_cfg.burn_target - end - - if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then - self.charge_setpoint = auto_cfg.charge_target * charge_scaler - end - - if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then - self.gen_rate_setpoint = auto_cfg.gen_target * gen_scaler - end - - if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then - for i = 1, config.UnitCount do - local limit = auto_cfg.limits[i] - - if (type(limit) == "number") and (limit >= 0.1) then - limits[i] = limit - self.units[i].set_burn_limit(limit) - end - end - end - - ready = self.mode_set > 0 - - if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or - ((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or - ((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then - ready = false - end - - ready = ready and self.units_ready - - if ready then self.mode = self.mode_set end + if ready and self.units_ready then + self.mode = self.mode_set end log.debug(util.c("FAC: process start ", util.trinary(ready, "accepted", "rejected"))) @@ -351,8 +443,8 @@ function facility.new(config) ready, self.mode_set, self.burn_target, - self.charge_setpoint / charge_scaler, - self.gen_rate_setpoint / gen_scaler, + self.charge_setpoint / CHARGE_SCALER, + self.gen_rate_setpoint / GEN_SCALER, limits } end diff --git a/supervisor/facility_update.lua b/supervisor/facility_update.lua index 5e6fa07..9526596 100644 --- a/supervisor/facility_update.lua +++ b/supervisor/facility_update.lua @@ -650,8 +650,16 @@ function update.auto_safety() end end --- update last mode and set next mode +-- update last mode, set next mode, and update saved state as needed function update.post_auto() + if self.mode ~= next_mode then + settings.set("LastProcessState", next_mode) + local saved = settings.save("/supervisor.settings") + if not saved then + log.warning("facility_update.post_auto(): failed to save supervisor settings file") + end + end + self.last_mode = self.mode self.mode = next_mode end @@ -792,6 +800,7 @@ end function update.unit_mgmt() local insufficent_po_rate = false local need_emcool = false + local write_state = false for i = 1, #self.units do local u = self.units[i] @@ -807,6 +816,22 @@ function update.unit_mgmt() if (self.cooling_conf.fac_tank_mode > 0) and u.is_emer_cool_tripped() and (self.cooling_conf.fac_tank_defs[i] == 2) then need_emcool = true end + + -- check for control state changes to save + if self.last_unit_states[i] ~= u.get_control_state() then + self.last_unit_states[i] = u.get_control_state() + write_state = true + end + end + + -- record unit control states + + if write_state then + settings.set("LastUnitStates", self.last_unit_states) + local saved = settings.save("/supervisor.settings") + if not saved then + log.warning("facility_update.unit_mgmt(): failed to save supervisor settings file") + end end -- update waste product diff --git a/supervisor/session/coordinator.lua b/supervisor/session/coordinator.lua index b011d44..a606ada 100644 --- a/supervisor/session/coordinator.lua +++ b/supervisor/session/coordinator.lua @@ -234,6 +234,23 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim if pkt.type == CRDN_TYPE.INITIAL_BUILDS then -- acknowledgement to coordinator receiving builds self.acks.builds = true + elseif pkt.type == CRDN_TYPE.PROCESS_READY then + if pkt.length == 5 then + -- coordinator has sent all initial process data, power-on recovery is now possible + + ---@type start_auto_config + local config = { + mode = pkt.data[1], ---@type PROCESS + burn_target = pkt.data[2], ---@type number + charge_target = pkt.data[3], ---@type number + gen_target = pkt.data[4], ---@type number + limits = pkt.data[5] ---@type number[] + } + + facility.startup_recovery_start(config) + else + log.debug(log_tag .. "CRDN process ready packet length mismatch") + end elseif pkt.type == CRDN_TYPE.FAC_BUILDS then -- acknowledgement to coordinator receiving builds self.acks.fac_builds = true @@ -243,8 +260,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim if cmd == FAC_COMMAND.SCRAM_ALL then facility.scram_all() + facility.cancel_recovery() _send(CRDN_TYPE.FAC_CMD, { cmd, true }) elseif cmd == FAC_COMMAND.STOP then + facility.cancel_recovery() + local was_active = facility.auto_is_active() if was_active then @@ -253,15 +273,16 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim _send(CRDN_TYPE.FAC_CMD, { cmd, was_active }) elseif cmd == FAC_COMMAND.START then + facility.cancel_recovery() + if pkt.length == 6 then - ---@type sys_auto_config ----@diagnostic disable-next-line: missing-fields + ---@class start_auto_config local config = { - mode = pkt.data[2], - burn_target = pkt.data[3], - charge_target = pkt.data[4], - gen_target = pkt.data[5], - limits = pkt.data[6] + mode = pkt.data[2], ---@type PROCESS + burn_target = pkt.data[3], ---@type number + charge_target = pkt.data[4], ---@type number + gen_target = pkt.data[5], ---@type number + limits = pkt.data[6] ---@type number[] } _send(CRDN_TYPE.FAC_CMD, { cmd, table.unpack(facility.auto_start(config)) }) @@ -313,8 +334,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim local manual = facility.get_group(uid) == AUTO_GROUP.MANUAL if cmd == UNIT_COMMAND.SCRAM then + facility.cancel_recovery() out_queue.push_data(SV_Q_DATA.SCRAM, data) elseif cmd == UNIT_COMMAND.START then + facility.cancel_recovery() + if manual then out_queue.push_data(SV_Q_DATA.START, data) else @@ -324,6 +348,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim elseif cmd == UNIT_COMMAND.RESET_RPS then out_queue.push_data(SV_Q_DATA.RESET_RPS, data) elseif cmd == UNIT_COMMAND.SET_BURN then + facility.cancel_recovery() + if pkt.length == 3 then if manual then out_queue.push_data(SV_Q_DATA.SET_BURN, data) @@ -354,6 +380,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim log.debug(log_tag .. "CRDN unit command reset alarm missing alarm id") end elseif cmd == UNIT_COMMAND.SET_GROUP then + facility.cancel_recovery() + if (pkt.length == 3) and (type(pkt.data[3]) == "number") and (pkt.data[3] >= AUTO_GROUP.MANUAL) and (pkt.data[3] <= AUTO_GROUP.BACKUP) then facility.set_group(unit.get_id(), pkt.data[3]) diff --git a/supervisor/session/plc.lua b/supervisor/session/plc.lua index bedbd8b..0217deb 100644 --- a/supervisor/session/plc.lua +++ b/supervisor/session/plc.lua @@ -61,7 +61,6 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue, local log_tag = "plc_session(" .. id .. "): " local self = { - commanded_state = false, commanded_burn_rate = 0.0, auto_cmd_token = 0, ramping_rate = false, diff --git a/supervisor/startup.lua b/supervisor/startup.lua index 32651da..c2686e0 100644 --- a/supervisor/startup.lua +++ b/supervisor/startup.lua @@ -147,6 +147,9 @@ local function main() -- halve the rate heartbeat LED flash local heartbeat_toggle = true + -- init startup recovery + sv_facility.startup_recovery_init(supervisor.boot_state) + -- event loop while true do local event, param1, param2, param3, param4, param5 = util.pull_event() diff --git a/supervisor/supervisor.lua b/supervisor/supervisor.lua index fe7b011..4df3bc5 100644 --- a/supervisor/supervisor.lua +++ b/supervisor/supervisor.lua @@ -19,10 +19,21 @@ local config = {} supervisor.config = config --- load the supervisor configuration +-- load the supervisor configuration and startup state function supervisor.load_config() if not settings.load("/supervisor.settings") then return false end + ---@class sv_control_state + local boot_state = { + mode = settings.get("LastProcessState"), ---@type PROCESS + unit_states = settings.get("LastUnitStates") ---@type boolean[] + } + + -- only record boot state if likely valid + if type(boot_state.mode) == "number" and type(boot_state.unit_states) == "table" then + supervisor.boot_state = boot_state + end + config.UnitCount = settings.get("UnitCount") config.CoolingConfig = settings.get("CoolingConfig") config.FacilityTankMode = settings.get("FacilityTankMode") diff --git a/supervisor/unit.lua b/supervisor/unit.lua index dc59cff..8bbac07 100644 --- a/supervisor/unit.lua +++ b/supervisor/unit.lua @@ -917,6 +917,12 @@ function unit.new(reactor_id, num_boilers, num_turbines, ext_idle) return status end + -- check the commanded control state of the reactor (if connected) + ---@nodiscard + function public.get_control_state() + if self.plc_i ~= nil then return self.plc_i.get_db().control_state else return false end + end + -- get the current burn rate (actual rate) ---@nodiscard function public.get_burn_rate()