summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/default.nix1
-rw-r--r--lib/strings.nix91
-rw-r--r--lib/tests/misc.nix95
3 files changed, 187 insertions, 0 deletions
diff --git a/lib/default.nix b/lib/default.nix
index 19316addb8cb..c433ca6a3e09 100644
--- a/lib/default.nix
+++ b/lib/default.nix
@@ -347,6 +347,7 @@ let
toSentenceCase
addContextFrom
splitString
+ splitStringBy
removePrefix
removeSuffix
versionOlder
diff --git a/lib/strings.nix b/lib/strings.nix
index d281120cad7f..70f3a6cc8a25 100644
--- a/lib/strings.nix
+++ b/lib/strings.nix
@@ -1589,6 +1589,97 @@ rec {
map (addContextFrom s) splits;
/**
+ Splits a string into substrings based on a predicate that examines adjacent characters.
+
+ This function provides a flexible way to split strings by checking pairs of characters
+ against a custom predicate function. Unlike simpler splitting functions, this allows
+ for context-aware splitting based on character transitions and patterns.
+
+ # Inputs
+
+ `predicate`
+ : Function that takes two arguments (previous character and current character)
+ and returns true when the string should be split at the current position.
+ For the first character, previous will be "" (empty string).
+
+ `keepSplit`
+ : Boolean that determines whether the splitting character should be kept as
+ part of the result. If true, the character will be included at the beginning
+ of the next substring; if false, it will be discarded.
+
+ `str`
+ : The input string to split.
+
+ # Return
+
+ A list of substrings from the original string, split according to the predicate.
+
+ # Type
+
+ ```
+ splitStringBy :: (string -> string -> bool) -> bool -> string -> [string]
+ ```
+
+ # Examples
+ :::{.example}
+ ## `lib.strings.splitStringBy` usage example
+
+ Split on periods and hyphens, discarding the separators:
+ ```nix
+ splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz"
+ => [ "foo" "bar" "baz" ]
+ ```
+
+ Split on transitions from lowercase to uppercase, keeping the uppercase characters:
+ ```nix
+ splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz"
+ => [ "foo" "Bar" "Baz" ]
+ ```
+
+ Handle leading separators correctly:
+ ```nix
+ splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"
+ => [ "" "foo" "bar" "baz" ]
+ ```
+
+ Handle trailing separators correctly:
+ ```nix
+ splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."
+ => [ "foo" "bar" "baz" "" ]
+ ```
+ :::
+ */
+ splitStringBy =
+ predicate: keepSplit: str:
+ let
+ len = stringLength str;
+
+ # Helper function that processes the string character by character
+ go =
+ pos: currentPart: result:
+ # Base case: reached end of string
+ if pos == len then
+ result ++ [ currentPart ]
+ else
+ let
+ currChar = substring pos 1 str;
+ prevChar = if pos > 0 then substring (pos - 1) 1 str else "";
+ isSplit = predicate prevChar currChar;
+ in
+ if isSplit then
+ # Split here - add current part to results and start a new one
+ let
+ newResult = result ++ [ currentPart ];
+ newCurrentPart = if keepSplit then currChar else "";
+ in
+ go (pos + 1) newCurrentPart newResult
+ else
+ # Keep building current part
+ go (pos + 1) (currentPart + currChar) result;
+ in
+ if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]);
+
+ /**
Return a string without the specified prefix, if the prefix matches.
# Inputs
diff --git a/lib/tests/misc.nix b/lib/tests/misc.nix
index f5f1fb5e7c2d..f9f2b0264b9e 100644
--- a/lib/tests/misc.nix
+++ b/lib/tests/misc.nix
@@ -631,6 +631,101 @@ runTests {
];
};
+ testSplitStringBySimpleDelimiter = {
+ expr = strings.splitStringBy (
+ prev: curr:
+ builtins.elem curr [
+ "."
+ "-"
+ ]
+ ) false "foo.bar-baz";
+ expected = [
+ "foo"
+ "bar"
+ "baz"
+ ];
+ };
+
+ testSplitStringByLeadingDelimiter = {
+ expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz";
+ expected = [
+ ""
+ "foo"
+ "bar"
+ "baz"
+ ];
+ };
+
+ testSplitStringByTrailingDelimiter = {
+ expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz.";
+ expected = [
+ "foo"
+ "bar"
+ "baz"
+ ""
+ ];
+ };
+
+ testSplitStringByMultipleConsecutiveDelimiters = {
+ expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar";
+ expected = [
+ "foo"
+ ""
+ ""
+ "bar"
+ ];
+ };
+
+ testSplitStringByKeepingSplitChar = {
+ expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz";
+ expected = [
+ "foo"
+ ".bar"
+ ".baz"
+ ];
+ };
+
+ testSplitStringByCaseTransition = {
+ expr = strings.splitStringBy (
+ prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null
+ ) true "fooBarBaz";
+ expected = [
+ "foo"
+ "Bar"
+ "Baz"
+ ];
+ };
+
+ testSplitStringByEmptyString = {
+ expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "";
+ expected = [ "" ];
+ };
+
+ testSplitStringByComplexPredicate = {
+ expr = strings.splitStringBy (
+ prev: curr:
+ prev != ""
+ && curr != ""
+ && builtins.match "[0-9]" prev != null
+ && builtins.match "[a-z]" curr != null
+ ) true "123abc456def";
+ expected = [
+ "123"
+ "abc456"
+ "def"
+ ];
+ };
+
+ testSplitStringByUpperCaseStart = {
+ expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz";
+ expected = [
+ ""
+ "Foo"
+ "Bar"
+ "Baz"
+ ];
+ };
+
testEscapeShellArg = {
expr = strings.escapeShellArg "esc'ape\nme";
expected = "'esc'\\''ape\nme'";