diff options
| author | Pol Dellaiera <pol.dellaiera@protonmail.com> | 2025-04-23 04:51:38 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-23 04:51:38 +0000 |
| commit | a9320986b3bce732b1e37bea028ded52ed3e7273 (patch) | |
| tree | 7a1836ec929084321711f094614efd6b97c089bc /lib/strings.nix | |
| parent | python312Packages.docling-serve: 0.7.0 -> 0.8.0 (#400972) (diff) | |
| parent | lib.strings: init splitStringBy (diff) | |
| download | nixpkgs-a9320986b3bce732b1e37bea028ded52ed3e7273.tar.gz | |
lib.strings: init splitStringBy (#385643)
Diffstat (limited to 'lib/strings.nix')
| -rw-r--r-- | lib/strings.nix | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/strings.nix b/lib/strings.nix index 7142a156e393..ba055bc3d22d 100644 --- a/lib/strings.nix +++ b/lib/strings.nix @@ -1593,6 +1593,97 @@ rec { map (addContextFrom s) splits; /** + Splits a string into substrings based on a predicate that examines adjacent characters. + + This function provides a flexible way to split strings by checking pairs of characters + against a custom predicate function. Unlike simpler splitting functions, this allows + for context-aware splitting based on character transitions and patterns. + + # Inputs + + `predicate` + : Function that takes two arguments (previous character and current character) + and returns true when the string should be split at the current position. + For the first character, previous will be "" (empty string). + + `keepSplit` + : Boolean that determines whether the splitting character should be kept as + part of the result. If true, the character will be included at the beginning + of the next substring; if false, it will be discarded. + + `str` + : The input string to split. + + # Return + + A list of substrings from the original string, split according to the predicate. + + # Type + + ``` + splitStringBy :: (string -> string -> bool) -> bool -> string -> [string] + ``` + + # Examples + :::{.example} + ## `lib.strings.splitStringBy` usage example + + Split on periods and hyphens, discarding the separators: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz" + => [ "foo" "bar" "baz" ] + ``` + + Split on transitions from lowercase to uppercase, keeping the uppercase characters: + ```nix + splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz" + => [ "foo" "Bar" "Baz" ] + ``` + + Handle leading separators correctly: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz" + => [ "" "foo" "bar" "baz" ] + ``` + + Handle trailing separators correctly: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz." + => [ "foo" "bar" "baz" "" ] + ``` + ::: + */ + splitStringBy = + predicate: keepSplit: str: + let + len = stringLength str; + + # Helper function that processes the string character by character + go = + pos: currentPart: result: + # Base case: reached end of string + if pos == len then + result ++ [ currentPart ] + else + let + currChar = substring pos 1 str; + prevChar = if pos > 0 then substring (pos - 1) 1 str else ""; + isSplit = predicate prevChar currChar; + in + if isSplit then + # Split here - add current part to results and start a new one + let + newResult = result ++ [ currentPart ]; + newCurrentPart = if keepSplit then currChar else ""; + in + go (pos + 1) newCurrentPart newResult + else + # Keep building current part + go (pos + 1) (currentPart + currChar) result; + in + if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]); + + /** Return a string without the specified prefix, if the prefix matches. # Inputs |
