Skip to content
Snippets Groups Projects
Commit ba244eb8 authored by Leo Unoki's avatar Leo Unoki
Browse files

Merge branch 'leo/tree-sitter' into 'dev'

tree sitter foreign function interface

See merge request !3238
parents bd782ec9 9a6d067f
No related branches found
No related tags found
1 merge request!3238tree sitter foreign function interface
Pipeline #1387865695 failed
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
(using menhir 2.0) (using menhir 2.0)
(using coq 0.8) (using coq 0.8)
(using ctypes 0.3)
(package (package
(name ligo) (name ligo)
......
...@@ -59,7 +59,8 @@ ...@@ -59,7 +59,8 @@
]; ];
}; };
ligo = pkgs.callPackage ./nix/ligo.nix {inherit tezos-ligo;}; tree-sitter-typescript = pkgs.callPackage ./nix/tree-sitter-typescript.nix {};
ligo = pkgs.callPackage ./nix/ligo.nix {inherit tezos-ligo tree-sitter-typescript;};
ligo-syntaxes = ./tools/vscode/syntaxes; ligo-syntaxes = ./tools/vscode/syntaxes;
ligo-webide = pkgs.callPackage ./nix/webide.nix {inherit ligo-syntaxes;}; ligo-webide = pkgs.callPackage ./nix/webide.nix {inherit ligo-syntaxes;};
ligo-debugger = pkgs.callPackage ./nix/debugger.nix {}; ligo-debugger = pkgs.callPackage ./nix/debugger.nix {};
...@@ -128,6 +129,8 @@ ...@@ -128,6 +129,8 @@
shellHook = '' shellHook = ''
# This is a hack to work around the hack used in the dune files # This is a hack to work around the hack used in the dune files
export OPAM_SWITCH_PREFIX="${ligo.OPAM_SWITCH_PREFIX}"; export OPAM_SWITCH_PREFIX="${ligo.OPAM_SWITCH_PREFIX}";
export TREE_SITTER="${ligo.TREE_SITTER}";
export TREE_SITTER_TYPESCRIPT="${ligo.TREE_SITTER_TYPESCRIPT}";
''; '';
}; };
......
open Ctypes
open Api_types
(* functions api defined in tree-sitter *)
(* check: https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h *)
module Functions (S : FOREIGN) = struct
open S
(************************)
(*** Section - Parser ***)
(************************)
(* Create a new parser. *)
let ts_parser_new = foreign "ts_parser_new" (void @-> returning @@ ptr ts_parser)
(* Delete the parser, freeing all of the memory that it used. *)
let ts_parser_delete = foreign "ts_parser_delete" (ptr ts_parser @-> returning void)
(*
* Returns a boolean indicating whether or not the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using [`ts_language_version`]
* and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and
* [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
*)
let ts_parser_set_language =
foreign "ts_parser_set_language" (ptr ts_parser @-> ptr ts_language @-> returning bool)
(*
* Use the parser to parse some source code stored in one contiguous buffer.
* The first two parameters are the same as in the [`ts_parser_parse`] function
* above. The second two parameters indicate the location of the buffer and its
* length in bytes.
*)
let ts_parser_parse_string =
foreign
"ts_parser_parse_string"
(ptr ts_parser @-> ptr ts_tree @-> string @-> uint32_t @-> returning @@ ptr ts_tree)
(************************)
(**** Section - Tree ****)
(************************)
(*
* Create a shallow copy of the syntax tree. This is very fast.
*
* You need to copy a syntax tree in order to use it on more than one thread at
* a time, as syntax trees are not thread safe.
*)
let ts_tree_copy = foreign "ts_tree_copy" (ptr ts_tree @-> returning @@ ptr ts_tree)
(* Delete the syntax tree, freeing all of the memory that it used. *)
let ts_tree_delete = foreign "ts_tree_delete" (ptr ts_tree @-> returning void)
(* Get the root node of the syntax tree. *)
let ts_tree_root_node = foreign "ts_tree_root_node" (ptr ts_tree @-> returning ts_node)
(************************)
(**** Section - Node ****)
(************************)
(* Get the node's type as a null-terminated string. *)
let ts_node_type = foreign "ts_node_type" (ts_node @-> returning @@ ptr char)
(*
* Get an S-expression representing the node as a string.
* This string is allocated with `malloc` and the caller is responsible for
* freeing it using `free`.
*)
let ts_node_string = foreign "ts_node_string" (ts_node @-> returning @@ ptr char)
(* Get the node's number of children. *)
let ts_node_child_count = foreign "ts_node_child_count" (ts_node @-> returning uint32_t)
(* Get the node's *named* child at the given index. *)
let ts_node_named_child =
foreign "ts_node_named_child" (ts_node @-> uint32_t @-> returning ts_node)
(* Get the node's number of *named* children. *)
let ts_node_named_child_count =
foreign "ts_node_named_child_count" (ts_node @-> returning uint32_t)
(* Get the node's next / previous sibling. *)
let ts_node_next_sibling = foreign "ts_node_next_sibling" (ts_node @-> returning ts_node)
let ts_node_prev_sibling = foreign "ts_node_prev_sibling" (ts_node @-> returning ts_node)
(* Get the node's next / previous *named* sibling. *)
let ts_node_next_named_sibling =
foreign "ts_node_next_named_sibling" (ts_node @-> returning ts_node)
let ts_node_prev_named_sibling =
foreign "ts_node_prev_named_sibling" (ts_node @-> returning ts_node)
end
open Ctypes
(* structs defined in tree-sitter *)
(* check: https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h *)
module Types (S : Ctypes.TYPE) = struct
open S
(* ////////// Types ////////// *)
let ts_symbol = uint16_t
let ts_state_id = uint16_t
let ts_field_id = uint16_t
(* ////////// struct TSPoint ////////// *)
type ts_point
let ts_point : ts_point structure typ = structure "TSPoint"
let row = field ts_point "row" uint
let column = field ts_point "column" uint
let () = seal ts_point
(* ////////// struct TSRange ////////// *)
type ts_range
let ts_range : ts_range structure typ = structure "TSRange"
let start_point = field ts_range "start_point" ts_point
let end_point = field ts_range "end_point" ts_point
let start_byte = field ts_range "start_byte" uint32_t
let end_byte = field ts_range "end_byte" uint32_t
let () = seal ts_range
(* ////////// struct TSLanguage ////////// *)
type ts_language
let ts_language : ts_language structure typ = structure "TSLanguage"
(* ////////// struct TSTree ////////// *)
type ts_tree
let ts_tree : ts_tree structure typ = structure "TSTree"
(* ////////// struct TSParser ////////// *)
type ts_parser
let ts_parser : ts_parser structure typ = structure "TSParser"
(* ////////// struct TSNode ////////// *)
type ts_node
let ts_node : ts_tree structure typ = structure "TSNode"
let context = field ts_node "context" (array 4 uint32_t)
let id = field ts_node "id" (ptr void)
let tree = field ts_node "tree" (ptr ts_tree)
let () = seal ts_node
end
(library
(name tree_sitter)
(libraries ctypes ctypes-foreign)
(ctypes
(external_library_name tree-sitter)
(build_flags_resolver
(vendored
(c_flags
:standard
-I%{env:TREE_SITTER=}/include
-Wno-incompatible-pointer-types-discards-qualifiers)
(c_library_flags :standard -ltree-sitter -L%{env:TREE_SITTER=}/lib)))
(headers
(include tree_sitter/api.h))
(type_description
(instance Types)
(functor Api_types_desc))
(function_description
(concurrency unlocked)
(instance Functions)
(functor Api_funcs_desc))
(generated_types Api_types)
(generated_entry_point Api)))
(executable
(name example)
(libraries tree_sitter tree_sitter_typescript)
(modules example)
)
\ No newline at end of file
open Tree_sitter.Api
open Types
open Functions
open Tree_sitter_typescript.Api
open Functions
open Ctypes
open Unsigned
(* converts from c string of type `char *` to ocaml string of type `string` *)
let string_of_ptr_char ptr =
let rec get_length p = if !@p = '\000' then 0 else 1 + get_length (p +@ 1) in
let length = get_length ptr in
let buffer = Bytes.create length in
for i = 0 to length - 1 do
Bytes.set buffer i !@(ptr +@ i)
done;
Bytes.to_string buffer
let print_node node =
let ptr_char = ts_node_string node in
Printf.printf "\n\n TypeScript CST : %s\n\n" @@ string_of_ptr_char ptr_char
let ts_node_type_string node = string_of_ptr_char @@ ts_node_type node
let parse_typescript_string source_code =
let parser = ts_parser_new () in
let language = tree_sitter_typescript () in
let _ = ts_parser_set_language parser language in
let null_tree = from_voidp ts_tree null in
let tree =
ts_parser_parse_string
parser
null_tree
source_code
(UInt32.of_int @@ String.length source_code)
in
ts_parser_delete parser;
tree
let () =
let code = "[1, null]" in
let tree = parse_typescript_string code in
(* extract the node *)
let root_node = ts_tree_root_node tree in
let expr_stmt_node = ts_node_named_child root_node (UInt32.of_int 0) in
let array_node = ts_node_named_child expr_stmt_node (UInt32.of_int 0) in
let number_node = ts_node_named_child array_node (UInt32.of_int 0) in
let null_node = ts_node_named_child array_node (UInt32.of_int 1) in
(* get the node type *)
assert (ts_node_type_string root_node = "program");
assert (ts_node_type_string expr_stmt_node = "expression_statement");
assert (ts_node_type_string array_node = "array");
assert (ts_node_type_string number_node = "number");
assert (ts_node_type_string null_node = "null");
(* get the child count *)
assert (ts_node_child_count root_node = UInt32.of_int 1);
assert (ts_node_child_count array_node = UInt32.of_int 5);
assert (ts_node_child_count number_node = UInt32.of_int 0);
print_node root_node;
ts_tree_delete tree
open Ctypes
open Tree_sitter.Api_types
module Functions (S: FOREIGN) = struct
open S
let tree_sitter_typescript = foreign "tree_sitter_typescript" (void @-> returning (ptr ts_language))
end
\ No newline at end of file
module Types (S: Ctypes.TYPE) = struct end
(library
(name tree_sitter_typescript)
(libraries ctypes ctypes-foreign tree_sitter)
(ctypes
(external_library_name tree-sitter-typescript)
(build_flags_resolver
(vendored
(c_flags :standard -I%{env:TREE_SITTER_TYPESCRIPT=}/include)
(c_library_flags
:standard
-L%{env:TREE_SITTER=}/lib
-ltree-sitter
-L%{env:TREE_SITTER_TYPESCRIPT=}/lib
-ltree-sitter-typescript)))
(headers
(include tree_sitter/tree-sitter-typescript.h))
(type_description
(instance Types)
(functor Api_types_desc))
(function_description
(concurrency unlocked)
(instance Functions)
(functor Api_funcs_desc))
(generated_types Api_types)
(generated_entry_point Api)))
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
lib, lib,
pkgs, pkgs,
tezos-ligo, tezos-ligo,
tree-sitter,
tree-sitter-typescript
}: let }: let
inherit (pkgs) darwin ocamlPackages python3Packages coq_8_13 tezos-rust-libs; inherit (pkgs) darwin ocamlPackages python3Packages coq_8_13 tezos-rust-libs;
in in
...@@ -13,6 +15,8 @@ in ...@@ -13,6 +15,8 @@ in
src = ./..; src = ./..;
OPAM_SWITCH_PREFIX = "${tezos-rust-libs}"; OPAM_SWITCH_PREFIX = "${tezos-rust-libs}";
TREE_SITTER = "${tree-sitter}";
TREE_SITTER_TYPESCRIPT = "${tree-sitter-typescript}";
postPatch = '' postPatch = ''
mkdir -p vendors/tezos-ligo mkdir -p vendors/tezos-ligo
...@@ -26,6 +30,8 @@ in ...@@ -26,6 +30,8 @@ in
crunch crunch
odoc odoc
python3Packages.jsonschema python3Packages.jsonschema
tree-sitter
tree-sitter-typescript
]; ];
propagatedBuildInputs = propagatedBuildInputs =
...@@ -97,6 +103,8 @@ in ...@@ -97,6 +103,8 @@ in
alcotest # with-test alcotest # with-test
ppx_expect # with-test ppx_expect # with-test
ppx_inline_test # with-test ppx_inline_test # with-test
ctypes
ctypes-foreign
] ]
++ lib.optionals stdenv.isDarwin [ ++ lib.optionals stdenv.isDarwin [
darwin.apple_sdk.frameworks.Security darwin.apple_sdk.frameworks.Security
......
{
stdenv
, lib
, fetchFromGitHub
, tree-sitter
}:
let
version = "0.22.5";
src = fetchFromGitHub {
owner = "tree-sitter";
repo = "tree-sitter-typescript";
rev = "198d03553f43a45b92ac5d0ee167db3fec6a6fd6";
hash = "sha256-U597+o8gakd4nU9H2FE2aVhGqSG/eRh6BUhtEmwMzrU=";
};
in
stdenv.mkDerivation {
pname = "tree-sitter-typescript";
inherit src version;
nativeBuildInputs = [ tree-sitter ];
configurePhase = ''
cd typescript
'';
makeFlags = [ "PREFIX=${placeholder "out"}" ];
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment