(* The entry point *)
let rec go content =
let env = { file = None; modified = []; line = 0; result = [] } in
let lines = split_lines content in
start env lines;
match List.rev env.result with
| [] -> []
| _ :: results -> results
(* Skip the text before the first +++ (to make things work with git show) *)
and start env = function
| [] -> ()
| line :: lines
when String.length line > 4 && String.sub line 0 3 = "+++" ->
header env line;
modified env 0 lines
| _ :: lines -> start env lines
Its beautiful to me because its very concise looking, while still using a fair bit of real English, meaningful symbols, and idioms like [] makes an empty array or whatever. I have to admit, much of this is lost on me, but it does feel more accessible than other FP languages. It almost has a Python feel.[0] https://github.com/facebook/flow/blob/master/hack/parsing/fo...
env.lines = [line for line in env.lines if not line.startswith("+++")]
And just look at this: when String.length line > 4 && String.sub line 0 3 = "+++"
Really? Specifying numbers like that in the code? And are we really going to alloc an object here? Wow. Wouldn't anyone prefer to use something like: line.startswith("+++") ?
edit: my code doesn't implement the exact same functionality. I've only attempted to give a feel/look of the code, not rewrite it. it looks like the original code returns the lines after the '+++' lines, but I can not tell for sure, as the header/modified function implementations are not given.OCaml is a language with currying and higher order functions, all that power is wasted if instead of composition you `.Objected` into everything. The advantages of piping, currying and composition on first class functions simply cannot be overstated.
Okay, so the next thing, from String.sub line 0 3 it's clear that you need to know the string length, hence the > 4 can't be avoided. You could pack it away into a function but why waste time if you're not going to write this 3 or more times?
The function `start` takes an env, a string list and returns unit. It's a procedure. The function looks for a string that starts with "+++" and either fails to find and returning () else passes to `modified` (also a procedure, tail recursive) which goes through the list (line :: lines returns the head and the tail of the list) applying modifications to env by tracking which lines were modified. `Lines` is an integer list and `result` is an integer pair list, so hopefully it's clearer now why your code is doing something completely different.
---
env.lines = [line for line in env.lines if not line.startswith("+++")]
---I don't think that's what its actually doing. I think its actually return all the lines after the first line that starts with "+++". Or maybe all those lines but the first.
(I don't think its a good example of clear FP code, either.)
dropWhile (not . T.isPrefixOf "+++") >>> \case
[] -> return ()
(line:lines) -> do
header env line
modified env 0 lines
edited to add: code above assumes LambdaCase and OverloadedStrings extensions, and that you've imported Data.Text qualified as TActually from the line
| [] -> ()
we know that the function `start`returns nothing (well, returns a special value 'unit' which is OCaml's way to say nothing), so also the functions `header` and `modified` inside must return nothing, so they are called for their side effects only.Which is cool, but start is a (recursive) helper function for the go function (and if you look at the source file, there are a number more that are defined along with it, this is kind of a weird excerpt to just show the main go function and the first of several helpers), which is be the main public interface being defined in the code presented. I wasn't trying to describe what it looked like start did when called, but what it seemed like go was trying to do.
(And, I was doing so looking just at the excerpt and going a bit off the cuff; its clearly, looking at the source file, doing a lot more.)
start env = function
| [] -> ()
| line :: lines
when String.length line > 4 && String.sub line 0 3 = "+++" ->
header env line;
modified env 0 lines
| _ :: lines -> start env lines
is a bit elitist way to write a 2 argument function as a combination of 1 argument function and a lambda function. So let me rewrite: start (env) (list) = match list with
| [] -> ()
| line :: lines
when String.length line > 4 && String.sub line 0 3 = "+++" ->
header env line;
modified env 0 lines
| _ :: lines -> start env lines
So the function `start` takes some kind of state `env` as a first argument, and then it takes a list as a second argument, here named `list`.If the list is empty, [], it does nothing.
If the first list element, which is a string, starts with "+++", it calls two other functions: first header(env,line) where `line` is now the first list element, and then modified(env,0,lines) where `lines` is the rest of the list, not containing the first element.
If the first list element does not start with +++, it recurses to the rest of the list.
So this is not a filter: After first encounter of "+++" it calls those two other functions and stops.
def start(env, lines):
while lines:
line, *lines = lines
if len(line) > 4 and line.startswith("+++"):
header(env, line)
modified(env, 0, lines)
Notes: while lines:
is pythonic for while not lines == []: when String.length line > 4 && String.sub line 0 3 = "+++"
Specifying numbers like that in the code? Allocating an object? Wow. I don't understand why anyone wouldn't prefer to use something like: line.startswith("+++") let rec f (farg1) =
...
and g (garg1) =
...
is used to define mutually recursive functions f and g.Then again, in the given code the function `start`, while calling itself, doesn't call the other function `go`, so they are not really mutually recursive. `go` just calls `start` but actually `go` is not recursive at all.
So they could have written:
let rec start (env) =
...
and then let go (content) =
...
In this case, `start` needs to be defined first in the source code, so `go` can call it. Maybe the author wanted to write `start` below the definition of `go` (kind of Haskell style) and kind of abused the mutual recursion form for stylistic reasons?So you can say
let process data =
let data = sanitize data in
f(data)
instead of let process data0 =
let data1 = sanitize data0 in
f(data1)
And because OCaml uses the same let syntax for variables and functions, this also lets you to let f x = x * x
let f x = f (f x)
# now f(3) will be 81
So we can define new f using the definition of old f.In some cases I can see the point of overwriting variables, instead of using new names like data0, data1, data2, but in the case of functions, redefining functions while utilizing the previous definition in the body of new definition will probably not catch on.
But anyway, because this is possible, OCaml needs another way to signify the that f used in the definition of f is not meant to refer to a previous definition, but the current one.
Hence, let rec.
Also, while marking a single function recursive maybe does not make much difference when reading source code, it's kind of nice to read code when a group of mutually recursive functions is explicitly marked as such.
Additionally, a recursive binding (behind the scenes) is semantically much more complex as it's the effect of having a fixed-point operator called on a higher-order function. It's nice to have to make a specific syntactic call out to have this happen.
go content =
let env = Env { file = Nothing, modified = [], line = 0, result = 0 }
lines = split_lines content
env' = start env lines
in case reverse (result env') of
[] -> []
_ : results -> results
start env x = case x of
[] -> return ()
line : lines
| length line > 4 && take 3 line == "+++" ->
do header env line
modified env 0 lines
| otherwise ->
start env lines start env x = case x of
[] -> env
line : lines
| length line > 4 && take 3 line == "+++" ->
modified (header env line) 0 lines
| otherwise ->
start env lines
This reflects the pure functional style over the state monad mechanism I half implemented above. OCaml is just using normal mutability. start env [] = env
start env (line:lines)
| length line > 4 && take 3 line == "+++" =
modified (header env line) 0 lines
| otherwise =
start env lines
?or even
start env [] = env
start env (line@('+':'+':'+':_:_):lines) =
modified (header env line) 0 lines
start env (_:lines) =
start env lines
?Then ;; is used to separate statements at the top level. But in most of the cases when a new top level statement begins, OCaml can understand this anyway and you can omit the ;;. Or use the ;; if you prefer that style.
So, imperative code is as full of ;'s as C code, and toplevel use of ;; is a stylistic issue.
https://ocaml.org/learn/tutorials/structure_of_ocaml_program...
let rec go content =
may be problematic for an outsider who doesn't know that let and rec are keywords.It means let recursive function go(content) = ...