Compare commits

...

5 Commits

Author SHA1 Message Date
Robert Hensing
110a9ef105 Add rl-next/c-api-recoverable-errors 2025-09-10 12:49:41 +02:00
Robert Hensing
6bc358ba38 libexpr: Add recoverable errors
This provides an explicit API for call-fail-retry-succeed evaluation
flows, such as currently used in NixOps4.

An alternative design would simply reset the `Value` to the original
thunk instead of `tFailed` under the condition of catching a
`RecoverableEvalError`.
That is somewhat simpler, but I believe the presence of `tFailed` is
beneficial for possible use in the repl; being able to show the error
sooner, without re-evaluation.

The hasPos method and isEmpty function are required in order to avoid
an include loop.
2025-09-10 12:49:41 +02:00
Robert Hensing
9aa3cc9c8f libexpr: Call destructor on exception_ptr
This uses `gc_cleanup` to call the exception_ptr destructor when the
`Failed` is collected.
I don't know exactly how bad it is to deallocate `std::exception_ptr`
without destruction, but I guess it ranges from small leak to UB and crash.
2025-09-10 12:49:41 +02:00
Robert Hensing
6de4db100f Add tests/f/lang/eval-okay-tryeval-failed-thunk-reeval 2025-09-10 12:49:41 +02:00
Eelco Dolstra
b13143280c Introduce a "failed" value type
In the multithreaded evaluator, it's possible for multiple threads to
wait on the same thunk. If evaluation of the thunk results in an
exception, the waiting threads shouldn't try to re-force the thunk.
Instead, they should rethrow the same exception, without duplicating
any work.

Therefore, there is now a new value type `tFailed` that stores an
std::exception_ptr. If evaluation of a thunk/app results in an
exception, `forceValue()` overwrites the value with a `tFailed`. If
`forceValue()` encounters a `tFailed`, it rethrows the exception. So
you normally never need to check for failed values (since forcing them
causes a rethrow).
2025-09-10 12:49:41 +02:00
21 changed files with 347 additions and 17 deletions

View File

@@ -0,0 +1,23 @@
---
synopsis: "C API: Errors returned from your primops are not treated as recoverable by default"
prs: [13930]
---
Nix 2.32 by default remembers the error in the thunk that triggered it.
Previously the following sequence of events worked:
1. Have a thunk that invokes a primop that's defined through the C API
2. The primop returns an error
3. Force the thunk again
4. The primop returns a value
5. The thunk evaluated successfully
**Resolution**
C API consumers that rely on this must change their recoverable error calls:
```diff
-nix_set_err_msg(context, NIX_ERR_*, msg);
+nix_set_err_msg(context, NIX_ERR_RECOVERABLE, msg);
```

View File

@@ -1,4 +1,5 @@
#include "nix/expr/attr-set.hh"
#include "nix/expr/eval-error.hh"
#include "nix/util/configuration.hh"
#include "nix/expr/eval.hh"
#include "nix/store/globals.hh"
@@ -89,8 +90,13 @@ static void nix_c_primop_wrapper(
f(userdata, &ctx, (EvalState *) &state, (nix_value **) args, (nix_value *) &vTmp);
if (ctx.last_err_code != NIX_OK) {
/* TODO: Throw different errors depending on the error code */
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
if (ctx.last_err_code == NIX_ERR_RECOVERABLE) {
state.error<nix::RecoverableEvalError>("Recoverable error from custom function: %s", *ctx.last_err)
.atPos(pos)
.debugThrow();
} else {
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
}
}
if (!vTmp.isValid()) {
@@ -177,6 +183,8 @@ ValueType nix_get_type(nix_c_context * context, const nix_value * value)
switch (v.type()) {
case nThunk:
return NIX_TYPE_THUNK;
case nFailed:
return NIX_TYPE_FAILED;
case nInt:
return NIX_TYPE_INT;
case nFloat:

View File

@@ -32,7 +32,8 @@ typedef enum {
NIX_TYPE_ATTRS,
NIX_TYPE_LIST,
NIX_TYPE_FUNCTION,
NIX_TYPE_EXTERNAL
NIX_TYPE_EXTERNAL,
NIX_TYPE_FAILED,
} ValueType;
// forward declarations

View File

@@ -437,4 +437,105 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args)
assert_ctx_ok();
ASSERT_EQ(3, rInt);
}
// The following is a test case for retryable thunks.
// This is a requirement for the current way in which NixOps4 evaluates its deployment expressions.
// An alternative strategy could be implemented, but unwinding the stack may be a more efficient way to deal with many
// suspensions/resumptions, compared to e.g. using a thread or coroutine stack for each suspended dependency. This test
// models the essential bits of a deployment tool that uses such a strategy.
// State for the retryable primop - simulates deployment resource availability
struct DeploymentResourceState
{
bool vm_created = false;
};
static void primop_load_resource_input(
void * user_data, nix_c_context * context, EvalState * state, nix_value ** args, nix_value * ret)
{
assert(context);
assert(state);
auto * resource_state = static_cast<DeploymentResourceState *>(user_data);
// Get the resource input name argument
std::string input_name;
if (nix_get_string(context, args[0], OBSERVE_STRING(input_name)) != NIX_OK)
return;
// Only handle "vm_id" input - throw for anything else
if (input_name != "vm_id") {
std::string error_msg = "unknown resource input: " + input_name;
nix_set_err_msg(context, NIX_ERR_NIX_ERROR, error_msg.c_str());
return;
}
if (resource_state->vm_created) {
// VM has been created, return the ID
nix_init_string(context, ret, "vm-12345");
} else {
// VM not created yet, fail with dependency error
nix_set_err_msg(context, NIX_ERR_RECOVERABLE, "VM not yet created");
}
}
TEST_F(nix_api_expr_test, nix_expr_thunk_re_evaluation_after_deployment)
{
// This test demonstrates NixOps4's requirement: a thunk calling a primop should be
// re-evaluable when deployment resources become available that were not available initially.
DeploymentResourceState resource_state;
PrimOp * primop = nix_alloc_primop(
ctx,
primop_load_resource_input,
1,
"loadResourceInput",
nullptr,
"load a deployment resource input",
&resource_state);
assert_ctx_ok();
nix_value * primopValue = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_primop(ctx, primopValue, primop);
assert_ctx_ok();
nix_value * inputName = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_string(ctx, inputName, "vm_id");
assert_ctx_ok();
// Create a single thunk by using nix_init_apply instead of nix_value_call
// This creates a lazy application that can be forced multiple times
nix_value * thunk = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_apply(ctx, thunk, primopValue, inputName);
assert_ctx_ok();
// First force: VM not created yet, should fail
nix_value_force(ctx, state, thunk);
ASSERT_EQ(NIX_ERR_NIX_ERROR, nix_err_code(ctx));
ASSERT_THAT(nix_err_msg(nullptr, ctx, nullptr), testing::HasSubstr("VM not yet created"));
// Clear the error context for the next attempt
nix_c_context_free(ctx);
ctx = nix_c_context_create();
// Simulate deployment process: VM gets created
resource_state.vm_created = true;
// Second force of the SAME thunk: this is where the "failed" value issue appears
// With failed value caching, this should fail because the thunk is marked as permanently failed
// Without failed value caching (or with retryable failures), this should succeed
nix_value_force(ctx, state, thunk);
// If we get here without error, the thunk was successfully re-evaluated
assert_ctx_ok();
std::string result;
nix_get_string(ctx, thunk, OBSERVE_STRING(result));
assert_ctx_ok();
ASSERT_STREQ("vm-12345", result.c_str());
}
} // namespace nixC

View File

@@ -113,5 +113,6 @@ template class EvalErrorBuilder<MissingArgumentError>;
template class EvalErrorBuilder<InfiniteRecursionError>;
template class EvalErrorBuilder<InvalidPathError>;
template class EvalErrorBuilder<IFDError>;
template class EvalErrorBuilder<RecoverableEvalError>;
} // namespace nix

View File

@@ -1,4 +1,5 @@
#include "nix/expr/eval.hh"
#include "nix/expr/eval-error.hh"
#include "nix/expr/eval-settings.hh"
#include "nix/expr/primops.hh"
#include "nix/expr/print-options.hh"
@@ -27,6 +28,7 @@
#include "parser-tab.hh"
#include <algorithm>
#include <exception>
#include <iostream>
#include <sstream>
#include <cstring>
@@ -124,6 +126,8 @@ std::string_view showType(ValueType type, bool withArticle)
return WA("a", "float");
case nThunk:
return WA("a", "thunk");
case nFailed:
return WA("a", "failure");
}
unreachable();
}
@@ -2060,6 +2064,54 @@ void ExprBlackHole::eval(EvalState & state, [[maybe_unused]] Env & env, Value &
// always force this to be separate, otherwise forceValue may inline it and take
// a massive perf hit
[[gnu::noinline]]
void EvalState::handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos)
{
v.mkThunk(env, expr);
tryFixupBlackHolePos(v, pos);
auto e = std::current_exception();
Value * recovery = nullptr;
try {
std::rethrow_exception(e);
} catch (const RecoverableEvalError & e) {
recovery = allocValue();
} catch (...) {
}
if (recovery) {
recovery->mkThunk(env, expr);
}
v.mkFailed(e, recovery);
}
[[gnu::noinline]]
void EvalState::handleEvalExceptionForApp(Value & v)
{
auto e = std::current_exception();
Value * recovery = nullptr;
try {
std::rethrow_exception(e);
} catch (const RecoverableEvalError & e) {
recovery = allocValue();
} catch (...) {
}
if (recovery) {
*recovery = v;
}
v.mkFailed(e, recovery);
}
[[gnu::noinline]]
void EvalState::handleEvalFailed(Value & v, const PosIdx pos)
{
assert(v.isFailed());
if (auto recoveryValue = v.failed()->recoveryValue) {
v = *recoveryValue;
forceValue(v, pos);
} else {
std::rethrow_exception(v.failed()->ex);
}
}
void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
{
if (!v.isBlackhole())
@@ -2068,7 +2120,8 @@ void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
try {
std::rethrow_exception(e);
} catch (InfiniteRecursionError & e) {
e.atPos(positions[pos]);
if (!e.hasPos())
e.atPos(positions[pos]);
} catch (...) {
}
}
@@ -2693,8 +2746,11 @@ void EvalState::assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::st
}
return;
case nThunk: // Must not be left by forceValue
assert(false);
// Cannot be returned by forceValue().
case nThunk:
case nFailed:
unreachable();
default: // Note that we pass compiler flags that should make `default:` unreachable.
// Also note that this probably ran after `eqValues`, which implements
// the same logic more efficiently (without having to unwind stacks),
@@ -2786,8 +2842,11 @@ bool EvalState::eqValues(Value & v1, Value & v2, const PosIdx pos, std::string_v
// !!!
return v1.fpoint() == v2.fpoint();
case nThunk: // Must not be left by forceValue
assert(false);
// Cannot be returned by forceValue().
case nThunk:
case nFailed:
unreachable();
default: // Note that we pass compiler flags that should make `default:` unreachable.
error<EvalError>("eqValues: cannot compare %1% with %2%", showType(v1), showType(v2))
.withTrace(pos, errorCtx)

View File

@@ -56,6 +56,14 @@ MakeError(MissingArgumentError, EvalError);
MakeError(InfiniteRecursionError, EvalError);
MakeError(IFDError, EvalBaseError);
/**
* An evaluation error which should be retried instead of rethrown
*
* A RecoverableEvalError is not an EvalError, because we shouldn't cache it in the eval cache, as it should be retried
* anyway.
*/
MakeError(RecoverableEvalError, EvalBaseError);
struct InvalidPathError : public EvalError
{
public:

View File

@@ -5,6 +5,7 @@
#include "nix/expr/eval.hh"
#include "nix/expr/eval-error.hh"
#include "nix/expr/eval-settings.hh"
#include <exception>
namespace nix {
@@ -97,12 +98,19 @@ void EvalState::forceValue(Value & v, const PosIdx pos)
else
ExprBlackHole::throwInfiniteRecursionError(*this, v);
} catch (...) {
v.mkThunk(env, expr);
tryFixupBlackHolePos(v, pos);
handleEvalExceptionForThunk(env, expr, v, pos);
throw;
}
} else if (v.isApp())
callFunction(*v.app().left, *v.app().right, v, pos);
} else if (v.isApp()) {
try {
callFunction(*v.app().left, *v.app().right, v, pos);
} catch (...) {
handleEvalExceptionForApp(v);
throw;
}
} else if (v.isFailed()) {
handleEvalFailed(v, pos);
}
}
[[gnu::always_inline]]

View File

@@ -610,8 +610,28 @@ public:
*/
inline void forceValue(Value & v, const PosIdx pos);
private:
/**
* Internal support function for forceValue
*
* This code is factored out so that it's not in the heavily inlined hot path.
*/
void handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos);
/**
* Internal support function for forceValue
*
* This code is factored out so that it's not in the heavily inlined hot path.
*/
void handleEvalExceptionForApp(Value & v);
void handleEvalFailed(Value & v, PosIdx pos);
void tryFixupBlackHolePos(Value & v, PosIdx pos);
public:
/**
* Force a value, then recursively force list elements and
* attributes.

View File

@@ -2,6 +2,7 @@
///@file
#include <cassert>
#include <exception>
#include <span>
#include <type_traits>
#include <concepts>
@@ -35,6 +36,7 @@ typedef enum {
tBool,
tNull,
tFloat,
tFailed,
tExternal,
tPrimOp,
tAttrs,
@@ -57,6 +59,7 @@ typedef enum {
*/
typedef enum {
nThunk,
nFailed,
nInt,
nFloat,
nBool,
@@ -265,6 +268,30 @@ struct ValueBase
size_t size;
Value * const * elems;
};
/**
Representation of an evaluation that previously failed.
`Value` references `Failed` by packed pointer, and its `new` is GC managed.
@see gc_cleanup std::exception_ptr
*/
class Failed : public gc_cleanup
{
public:
std::exception_ptr ex;
/**
* Optional value for recovering `RecoverableEvalError`
* Must be set iff `ex` is an instance of `RecoverableEvalError`.
*/
Value * recoveryValue;
Failed(std::exception_ptr ex, Value * recoveryValue)
: ex(ex)
, recoveryValue(recoveryValue)
{
}
};
};
template<typename T>
@@ -291,6 +318,7 @@ struct PayloadTypeToInternalType
MACRO(PrimOp *, primOp, tPrimOp) \
MACRO(ValueBase::PrimOpApplicationThunk, primOpApp, tPrimOpApp) \
MACRO(ExternalValueBase *, external, tExternal) \
MACRO(ValueBase::Failed *, failed, tFailed) \
MACRO(NixFloat, fpoint, tFloat)
#define NIX_VALUE_PAYLOAD_TYPE(T, FIELD_NAME, DISCRIMINATOR) \
@@ -595,6 +623,11 @@ protected:
path.path = std::bit_cast<const char *>(payload[1]);
}
void getStorage(Failed *& failed) const noexcept
{
failed = std::bit_cast<Failed *>(payload[1]);
}
void setStorage(NixInt integer) noexcept
{
setSingleDWordPayload<tInt>(integer.value);
@@ -644,6 +677,11 @@ protected:
{
setUntaggablePayload<pdPath>(path.accessor, path.path);
}
void setStorage(Failed * failed) noexcept
{
setSingleDWordPayload<tFailed>(std::bit_cast<PackedPointer>(failed));
}
};
/**
@@ -866,12 +904,12 @@ public:
inline bool isThunk() const
{
return isa<tThunk>();
};
}
inline bool isApp() const
{
return isa<tApp>();
};
}
inline bool isBlackhole() const;
@@ -879,17 +917,22 @@ public:
inline bool isLambda() const
{
return isa<tLambda>();
};
}
inline bool isPrimOp() const
{
return isa<tPrimOp>();
};
}
inline bool isPrimOpApp() const
{
return isa<tPrimOpApp>();
};
}
inline bool isFailed() const
{
return isa<tFailed>();
}
/**
* Returns the normal type of a Value. This only returns nThunk if
@@ -926,6 +969,8 @@ public:
return nExternal;
case tFloat:
return nFloat;
case tFailed:
return nFailed;
case tThunk:
case tApp:
return nThunk;
@@ -1048,6 +1093,11 @@ public:
setStorage(n);
}
inline void mkFailed(std::exception_ptr e, Value * recovery) noexcept
{
setStorage(new Value::Failed(e, recovery));
}
bool isList() const noexcept
{
return isa<tListSmall, tListN>();
@@ -1151,6 +1201,11 @@ public:
{
return getStorage<Path>().accessor;
}
Failed * failed() const noexcept
{
return getStorage<Failed *>();
}
};
extern ExprBlackHole eBlackHole;

View File

@@ -515,6 +515,7 @@ static void prim_typeOf(EvalState & state, const PosIdx pos, Value ** args, Valu
v.mkStringNoCopy("float");
break;
case nThunk:
case nFailed:
unreachable();
}
}

View File

@@ -75,6 +75,9 @@ void printAmbiguous(
str << "«potential infinite recursion»";
}
break;
case nFailed:
str << "«failed»";
break;
case nFunction:
if (v.isLambda()) {
str << "<LAMBDA>";

View File

@@ -508,6 +508,11 @@ private:
}
}
void printFailed(Value & v)
{
output << "«failed»";
}
void printExternal(Value & v)
{
v.external()->print(output);
@@ -583,6 +588,10 @@ private:
printThunk(v);
break;
case nFailed:
printFailed(v);
break;
case nExternal:
printExternal(v);
break;

View File

@@ -96,6 +96,7 @@ json printValueAsJSON(
break;
case nThunk:
case nFailed:
case nFunction:
state.error<TypeError>("cannot convert %1% to JSON", showType(v)).atPos(v.determinePos(pos)).debugThrow();
}

View File

@@ -170,6 +170,11 @@ static void printValueAsXML(
case nThunk:
doc.writeEmptyElement("unevaluated");
break;
case nFailed:
doc.writeEmptyElement("failed");
break;
}
}

View File

@@ -98,6 +98,13 @@ enum nix_err {
*/
NIX_ERR_NIX_ERROR = -4,
/**
* @brief A recoverable error occurred.
*
* This is used primarily by C API *consumers* to communicate that a failed
* primop call should be retried on the next evaluation attempt.
*/
NIX_ERR_RECOVERABLE = -5,
};
typedef enum nix_err nix_err;

View File

@@ -51,6 +51,7 @@ struct LinesOfCode
FIXME: Untangle this mess. Should there be AbstractPos as there used to be before
4feb7d9f71? */
struct Pos;
bool isEmpty(const Pos & pos);
void printCodeLines(std::ostream & out, const std::string & prefix, const Pos & errPos, const LinesOfCode & loc);
@@ -187,6 +188,11 @@ public:
err.pos = pos;
}
bool hasPos()
{
return err.pos.get() && !isEmpty(*err.pos.get());
}
void pushTrace(Trace trace)
{
err.traces.push_front(trace);

View File

@@ -7,6 +7,11 @@ Pos::operator std::shared_ptr<const Pos>() const
return std::make_shared<const Pos>(*this);
}
bool isEmpty(const Pos & pos)
{
return !pos.operator bool();
}
std::optional<LinesOfCode> Pos::getCodeLines() const
{
if (line == 0)

View File

@@ -0,0 +1 @@
trace: throwing

View File

@@ -0,0 +1 @@
"done"

View File

@@ -0,0 +1,7 @@
# Since Nix 2.32, errors are memoized
let
# This attribute value will only be evaluated once.
foo = builtins.trace "throwing" throw "nope";
in
# Trigger and catch the error twice.
builtins.seq (builtins.tryEval foo).success builtins.seq (builtins.tryEval foo).success "done"