From: www Date: Sun, 29 Sep 2024 21:31:10 +0000 (+0000) Subject: Mirrored from /srv/git/yakumo.izuru/text-parse-ml.git X-Git-Url: https://git.chaotic.ninja/gitweb/yakumo_izuru/?a=commitdiff_plain;h=HEAD;p=text_parse.git Mirrored from /srv/git/yakumo.izuru/text-parse-ml.git git-svn-id: https://svn.chaotic.ninja/svn/text_parse-yakumo.izuru@1 3ed60d3f-a7b0-8c4c-b4e0-20cdec842ae6 --- 10c2702fac38c4509b445f3e9c71c059c3de4635 diff --git a/branches/master/.gitignore b/branches/master/.gitignore new file mode 100644 index 0000000..b6116b1 --- /dev/null +++ b/branches/master/.gitignore @@ -0,0 +1,3 @@ +_build +*.txt +*.merlin diff --git a/branches/master/LICENSE b/branches/master/LICENSE new file mode 100644 index 0000000..fa3348e --- /dev/null +++ b/branches/master/LICENSE @@ -0,0 +1,153 @@ +EUROPEAN UNION PUBLIC LICENCE v. 1.2 +EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Work: + +Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions +In this Licence, the following terms have the following meaning: + +— ‘The Licence’: this Licence. + +— ‘The Original Work’: the work or software distributed or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be. + +— ‘Derivative Works’: the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15. + +— ‘The Work’: the Original Work or its Derivative Works. + +— ‘The Source Code’: the human-readable form of the Work which is the most convenient for people to study and modify. + +— ‘The Executable Code’: any code which has generally been compiled and which is meant to be interpreted by a computer as a program. + +— ‘The Licensor’: the natural or legal person that distributes or communicates the Work under the Licence. + +— ‘Contributor(s)’: any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work. + +— ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of the Work under the terms of the Licence. + +— ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, online or offline, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person. + +2. Scope of the rights granted by the Licence +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, sublicensable licence to do the following, for the duration of copyright vested in the Original Work: + +— use the Work in any circumstance and for all usage, + +— reproduce the Work, + +— modify the Work, and make Derivative Works based upon the Work, + +— communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work, + +— distribute the Work or copies thereof, + +— lend and rent the Work or copies thereof, + +— sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence. + +3. Communication of the Source Code +The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute or communicate the Work. + +4. Limitations on copyright +Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Work, of the exhaustion of those rights or of other applicable limitations thereto. + +5. Obligations of the Licensee +The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification. +Copyleft clause: If the Licensee distributes or communicates copies of the Original Works or Derivative Works, this Distribution or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence. +Compatibility clause: If the Licensee Distributes or Communicates Derivative Works or copies thereof based upon both the Work and another work licensed under a Compatible Licence, this Distribution or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, ‘Compatible Licence’ refers to the licences listed in the appendix attached to this Licence. Should the Licensee's obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail. +Provision of Source Code: When distributing or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute or communicate the Work. +Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice. +6. Chain of Authorship +The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contri butions to the Work, under the terms of this Licence. + +7. Disclaimer of Warranty +The Work is a work in progress, which is continuously improved by numerous Contributors. It is not a finished work and may therefore contain defects or ‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work. + +8. Disclaimer of Liability +Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work. + +9. Additional agreements +While distributing the Work, You may choose to conclude an additional agreement, defining obligations or services consistent with this Licence. However, if accepting obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public +In case of any Distribution or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee. + +12. Termination of the Licence +The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence. + +13. Miscellaneous +Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed or reformed so as necessary to make it valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of this Licence or updated versions of the Appendix, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice. + +14. Jurisdiction +Without prejudice to specific agreement between parties, + +— any litigation resulting from the interpretation of this License, arising between the European Union institutions, bodies, offices or agencies, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Union, as laid down in article 272 of the Treaty on the Functioning of the European Union, + +— any litigation arising between other parties and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business. + +15. Applicable Law +Without prejudice to specific agreement between parties, + +— this Licence shall be governed by the law of the European Union Member State where the Licensor has his seat, resides or has his registered office, + +— this licence shall be governed by Belgian law if the Licensor has no seat, residence or registered office inside a European Union Member State. + +Appendix +‘Compatible Licences’ according to Article 5 EUPL are: + +— GNU General Public License (GPL) v. 2, v. 3 + +— GNU Affero General Public License (AGPL) v. 3 + +— Open Software License (OSL) v. 2.1, v. 3.0 + +— Eclipse Public License (EPL) v. 1.0 + +— CeCILL v. 2.0, v. 2.1 + +— Mozilla Public Licence (MPL) v. 2 + +— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 + +— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for works other than software + +— European Union Public Licence (EUPL) v. 1.1, v. 1.2 + +— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong Reciprocity (LiLiQ-R+) + +The European Commission may update this Appendix to later versions of the above licences without producing a new version of the EUPL, as long as they provide the rights granted in Article 2 of this Licence and protect the covered Source Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new EUPL version. diff --git a/branches/master/bin/cli.ml b/branches/master/bin/cli.ml new file mode 100644 index 0000000..653d202 --- /dev/null +++ b/branches/master/bin/cli.ml @@ -0,0 +1,23 @@ +module Test = struct + type t = unit + let blank_line () = print_string "{bl}" + let angled_uri s () = print_string ("{>}" ^ s ^ "{<}") + let plain_text s () = print_string s + let heading_hashbang i s () = print_string (string_of_int i ^ s) + let paragraph_s () = print_string "{p>}" + let paragraph_e () = print_string "{ " ^ u +let bold t a = a ^ "*" ^ t ^ "*" +let italic t a = a ^ "/" ^ t ^ "/" +let underline t a = a ^ "_" ^ t ^ "_" +let inline_monospace t a = a ^ "`" ^ t ^ "`" +let heading_hashbang lvl h a = a ^ String.make lvl '#' ^ h ^ "\n" +let paragraph_s a = a +let paragraph_e a = a +let preformatted s a = a ^ "
" ^ s ^ "
" +let bullet_list_s a = a +let bullet_list_e a = a +let bullet_item_s ch a = a ^ Char.escaped ch +let bullet_item_e a = a ^ "\n" +let ordered_list_s a = a +let ordered_list_e a = a +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value_pair k v a = prerr_endline @@ k ^ "~" ^ v; a + diff --git a/branches/master/converters/html.ml b/branches/master/converters/html.ml new file mode 100644 index 0000000..4a67e86 --- /dev/null +++ b/branches/master/converters/html.ml @@ -0,0 +1,39 @@ +let esc x = + let fn a c = match c with + | '&' -> a ^ "&" + | '<' -> a ^ "<" + | '"' -> a ^ """ + | '\''-> a ^ "'" + | x -> a ^ String.make 1 x + in + Seq.fold_left fn "" (String.to_seq x) + +type t = string +let blank_line a = a ^ "" +let plain_text s a = a ^ esc s +let sentence_s a = a ^ "" +let sentence_e a = a ^ " " +let sentence_segment s a = a ^ esc s ^ " " +let reference_name n a = a ^ {||} ^ esc n ^ "" +let bracketed_referent_s n a = a ^ {||} ^ esc n ^ ": " +let bracketed_referent_e a = a ^ "
" +let angled_uri u a = a ^ {|<|} ^ esc u ^ {|>|} +let bold t a = a ^ "" ^ esc t ^ "" +let italic t a = a ^ "" ^ esc t ^ "" +let underline t a = a ^ "" ^ esc t ^ "" +let inline_monospace t a = a ^ "" ^ esc t ^ "" +let heading_hashbang lvl h a = + let lvl = string_of_int lvl in + a ^ "" ^ esc h ^ "" +let paragraph_s a = a ^ "

" +let paragraph_e a = a ^ "

" +let preformatted s a = a ^ "
" ^ esc s ^ "
" +let bullet_list_s a = a ^ "" +let bullet_item_s _ch a = a ^ "
  • " +let bullet_item_e a = a ^ "
  • " +let ordered_list_s a = a ^ "
      " +let ordered_list_e a = a ^ "
    " +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value k v a = prerr_endline @@ k ^ "~" ^ v; a diff --git a/branches/master/cursor.ml b/branches/master/cursor.ml new file mode 100644 index 0000000..aaa8b5d --- /dev/null +++ b/branches/master/cursor.ml @@ -0,0 +1,22 @@ +type t = { text : string; pos : int; right_boundary : int } + +let overran cursor = cursor.pos >= cursor.right_boundary +let next_char cursor = { cursor with pos = cursor.pos + 1 } +let char_at cur offset = String.get cur.text (cur.pos + offset) +let char cur = String.get cur.text cur.pos +let distance a b = b.pos - a.pos + +let sub ?left ?right cur = { cur with + pos = Option.value left ~default:cur.pos; + right_boundary = Option.value right ~default:cur.right_boundary } + +let unwrap num cur = sub ~left:(cur.pos+num) ~right:(cur.right_boundary-num) cur + +let segment_string cur = String.sub cur.text cur.pos (cur.right_boundary - cur.pos) + +(*todo: reconsider +1 result and type cursor*) +let rec find_end e = function + | cur when cur.pos + 1 = String.length cur.text -> Some cur.pos + | cur when overran cur -> None + | cur when e cur (char cur) -> Some (cur.pos + 1) + | cur -> find_end e (next_char cur) diff --git a/branches/master/dune b/branches/master/dune new file mode 100644 index 0000000..889e5f1 --- /dev/null +++ b/branches/master/dune @@ -0,0 +1,4 @@ +(library + (name text_parse) + (public_name text_parse) + (modules parser syntax cursor)) diff --git a/branches/master/dune-project b/branches/master/dune-project new file mode 100644 index 0000000..8138c5d --- /dev/null +++ b/branches/master/dune-project @@ -0,0 +1,12 @@ +(lang dune 2.4) +(name text_parse) +(version 1.02) + +(license EUPL-1.2) +(maintainers "orbifx ") + +(generate_opam_files true) + +(package + (name text_parse) + (synopsis "Applicative text parsing library for OCaml")) diff --git a/branches/master/parser.ml b/branches/master/parser.ml new file mode 100644 index 0000000..2124048 --- /dev/null +++ b/branches/master/parser.ml @@ -0,0 +1,37 @@ +module type S = sig + include Syntax.S + type t + val parse: Cursor.t -> t -> t +end + +module type Sub_parsers = sig + type t + val subparsers: (module S with type t = t) array +end + +let at s e cur ch = if s cur ch then Cursor.find_end e cur else None + +let apply_default (type a) (module P: S with type t = a) (acc: a) cursor_default cursor = + if cursor_default = cursor then acc + else P.parse (Cursor.sub ~right:(cursor.Cursor.pos) cursor_default) acc + +let rec branch: type a. ?idx:int -> a -> Cursor.t -> Cursor.t -> (module S with type t = a) array -> (a * Cursor.t) = +fun ?idx:(i=1) acc cursor_default cursor syntaxes -> + if Cursor.overran cursor then (apply_default syntaxes.(0) acc cursor_default cursor), cursor + else + try let (module P: S with type t = a) = syntaxes.(i) in + (match at P.s P.e cursor (Cursor.char cursor) with + | Some right -> + let acc = apply_default syntaxes.(0) acc cursor_default cursor in + let acc = P.parse (Cursor.sub ~right cursor) acc in + let cursor = Cursor.sub ~left:right cursor in + branch acc cursor cursor syntaxes + | None | exception Invalid_argument _ -> branch ~idx:(i+1) acc cursor_default cursor syntaxes) + with Invalid_argument _ -> + branch acc cursor_default (Cursor.next_char cursor) syntaxes + +let rec parse subsyntaxes cursor acc = + if Cursor.overran cursor then acc + else + let acc, cursor = branch acc cursor cursor subsyntaxes in + parse subsyntaxes cursor acc diff --git a/branches/master/parsers/blank_line.ml b/branches/master/parsers/blank_line.ml new file mode 100644 index 0000000..6d02f3e --- /dev/null +++ b/branches/master/parsers/blank_line.ml @@ -0,0 +1,11 @@ +module type Fn = sig + type t + val blank_line: t -> t +end + +module Make (F : Fn) = struct + type t = F.t + let s _cur = function '\n' -> true | _ -> false + let e _cur = function '\n' -> true | _ -> false + let parse _cursor acc = F.blank_line acc +end diff --git a/branches/master/parsers/bullet.ml b/branches/master/parsers/bullet.ml new file mode 100644 index 0000000..b6c6f1c --- /dev/null +++ b/branches/master/parsers/bullet.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val bullet_item_s: char -> 'a -> 'a + val bullet_item_e: 'a -> 'a + val bullet_list_s: 'a -> 'a + val bullet_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x-1 | None -> 0 in + F.bullet_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.bullet_item_e +end + +module List (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) && not (s cursor (char_at cursor 2)) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.bullet_list_s acc |> parse subsyntaxes cur |> F.bullet_list_e +end diff --git a/branches/master/parsers/dune b/branches/master/parsers/dune new file mode 100644 index 0000000..ae648ff --- /dev/null +++ b/branches/master/parsers/dune @@ -0,0 +1,4 @@ +(library + (name parsers) + (public_name text_parse.parsers) + (libraries text_parse)) diff --git a/branches/master/parsers/emphasis.ml b/branches/master/parsers/emphasis.ml new file mode 100644 index 0000000..1486c5e --- /dev/null +++ b/branches/master/parsers/emphasis.ml @@ -0,0 +1,32 @@ +module type Fn = sig + val bold: string -> 'a -> 'a + val italic: string -> 'a -> 'a + val underline: string -> 'a -> 'a + val inline_monospace: string -> 'a -> 'a +end + +open Text_parse.Cursor + +module Bold (F : Fn) = struct + let s _cursor = function '*' -> true | _ -> false + let e = s + let parse cur acc = F.bold (segment_string (unwrap 1 cur)) acc +end + +module Italic (F : Fn) = struct + let s _cursor = function '/' -> true | _ -> false + let e = s + let parse cur acc = F.italic (segment_string (unwrap 1 cur)) acc +end + +module Underline (F : Fn) = struct + let s _cursor = function '_' -> true | _ -> false + let e = s + let parse cur acc = F.underline (segment_string (unwrap 1 cur)) acc +end + +module Inline_monospace (F : Fn) = struct + let s _cursor = function '`' -> true | _ -> false + let e = s + let parse cur acc = F.inline_monospace (segment_string (unwrap 1 cur)) acc +end diff --git a/branches/master/parsers/heading.ml b/branches/master/parsers/heading.ml new file mode 100644 index 0000000..8827d7b --- /dev/null +++ b/branches/master/parsers/heading.ml @@ -0,0 +1,17 @@ +module type Fn = sig + type t + val heading_hashbang: int -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Hashbang (F : Fn) = struct + type t = F.t + let s _cur = function '#' -> true | _ -> false + let e _cur = newline + let parse cursor acc = + let level = match find_end (fun _cur c -> c <> '#') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in + F.heading_hashbang level (segment_string { cursor with pos = cursor.pos + level + 1; right_boundary = cursor.right_boundary-1 }) acc +end diff --git a/branches/master/parsers/key_value.ml b/branches/master/parsers/key_value.ml new file mode 100644 index 0000000..98d9a87 --- /dev/null +++ b/branches/master/parsers/key_value.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val key_value: string -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn) = struct + type t = F.t + let s _cur c = letter c + let e _cur c = newline c + let parse cursor acc = + let colon_pos = match find_end (fun _cur c -> c = ':') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in (*todo:None shouldn't be allowed by scope*) + let key = segment_string { cursor with right_boundary = cursor.pos+colon_pos } in + let value = segment_string { cursor with pos = cursor.pos+colon_pos+1; right_boundary = cursor.right_boundary } in + F.key_value key value acc +end diff --git a/branches/master/parsers/markdown.ml b/branches/master/parsers/markdown.ml new file mode 100644 index 0000000..4af084b --- /dev/null +++ b/branches/master/parsers/markdown.ml @@ -0,0 +1,27 @@ +module type Markdown_t = sig + include Blank_line.Fn + include Reference.Fn with type t := t + include Bullet.Fn with type t := t + include Ordered.Fn with type t := t + include Heading.Fn with type t := t + include Preformatted.Fn with type t := t + include Paragraph.Fn with type t := t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Make (F : Markdown_t) = struct + let subsyntaxes = [| + (module Blank_line.Make (F) : Text_parse.Parser.S with type t = F.t); + (module Heading.Hashbang (F)); + (module Reference.Referent (F)); + (module Bullet.List (F)); + (module Ordered.List (F)); + (module Preformatted.Tabbed (F)); + (*(module Paragraph.Make (F));*) + |] + + let of_string text acc = + parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/master/parsers/ordered.ml b/branches/master/parsers/ordered.ml new file mode 100644 index 0000000..7d2c32e --- /dev/null +++ b/branches/master/parsers/ordered.ml @@ -0,0 +1,37 @@ +module type Fn = sig + type t + val ordered_item_s: char -> 'a -> 'a + val ordered_item_e: 'a -> 'a + val ordered_list_s: 'a -> 'a + val ordered_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x | None -> 0 in + F.ordered_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.ordered_item_e +end + +module List (F: Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in (*todo: can't have `.` if sentence ends with it and lists are in sense*) + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) && newline (char_at cursor 2)(* not (s {cursor with pos = cursor.pos+2} (char_at cursor 2)) *) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.ordered_list_s acc |> parse subsyntaxes cur |> F.ordered_list_e +end diff --git a/branches/master/parsers/paragraph.ml b/branches/master/parsers/paragraph.ml new file mode 100644 index 0000000..216ee5e --- /dev/null +++ b/branches/master/parsers/paragraph.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val paragraph_s: t -> t + val paragraph_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn)(S : Text_parse.Parser.Sub_parsers with type t = F.t) = struct + type t = F.t + let s _cur ch = printable ch + let e cur = function + | '\n' -> char_at cur (-1) = '\n' + | _ when cur.pos + 1 = cur.right_boundary -> true + | _ -> false + let parse cur acc = F.paragraph_s acc |> parse S.subparsers cur |> F.paragraph_e +end diff --git a/branches/master/parsers/plain_text.ml b/branches/master/parsers/plain_text.ml new file mode 100644 index 0000000..59496ea --- /dev/null +++ b/branches/master/parsers/plain_text.ml @@ -0,0 +1,36 @@ +module type Fn = sig + type t + val plain_text: string -> t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Plain_text (F : Fn) = struct + type t = F.t + let s _cursor _ch = true + let e cursor = function + | '\n' -> char_at cursor (-1) = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ -> false + let parse cur acc = F.plain_text (segment_string cur) acc +end + +module type Plain_text_t = sig + include Blank_line.Fn + include Heading.Fn with type t := t + include Uri.Fn with type t := t + include Paragraph.Fn with type t := t + include Fn with type t := t +end + + +module Make (F : Plain_text_t) = struct + module P = struct + type t = F.t + let subparsers = [| (module Plain_text (F) : Text_parse.Parser.S with type t = F.t); (module Uri.Angled (F)) |] + end + + let subparsers = [| (module Paragraph.Make (F)(P) : Text_parse.Parser.S with type t = F.t); (module Blank_line.Make (F)); (module Heading.Hashbang (F)); (module Paragraph.Make (F)(P)); |] + let of_string text acc = parse subparsers { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/master/parsers/preformatted.ml b/branches/master/parsers/preformatted.ml new file mode 100644 index 0000000..80bb795 --- /dev/null +++ b/branches/master/parsers/preformatted.ml @@ -0,0 +1,13 @@ +module type Fn = sig + type t + val tab_preformatted: string -> t -> t +end + +open Text_parse.Cursor + +module Tabbed (F : Fn) = struct + type t = F.t + let s _cur ch = '\t' = ch + let e cur = function '\n' -> not (char_at cur 1 = '\t') | _ -> false + let parse cur acc = F.tab_preformatted (segment_string cur) acc +end diff --git a/branches/master/parsers/reference.ml b/branches/master/parsers/reference.ml new file mode 100644 index 0000000..cf45ced --- /dev/null +++ b/branches/master/parsers/reference.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val reference_name: string -> string -> t -> t + val referent_s: string -> t -> t + val referent_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor +open Text_parse.Syntax + +module Name (F : Fn) = struct + type t = F.t + let s _cursor = function '[' -> true | _ -> false + let e _cursor = function ']' -> true | _ -> false + let parse cur acc = F.reference_name (segment_string (unwrap 1 cur)) acc +end + +module Referent (F : Fn) = struct + type t = F.t + let find_name_end = find_end (fun cur c -> c = ']' && (char_at cur 1) = ':') + let s cur = function '[' -> Option.is_some (find_name_end cur) | _ -> false + let e _cur = newline + let subsyntaxes = [| |] + let parse cur acc = + let name_boundary = match find_name_end cur with Some x -> x | None -> 0 in + let name = segment_string { cur with pos = cur.pos+1; right_boundary = name_boundary-1 } in + let text_cur = { cur with pos = name_boundary+2 } in + F.referent_s name acc |> parse subsyntaxes text_cur |> F.referent_e +end + diff --git a/branches/master/parsers/sentence.ml b/branches/master/parsers/sentence.ml new file mode 100644 index 0000000..cb006ac --- /dev/null +++ b/branches/master/parsers/sentence.ml @@ -0,0 +1,37 @@ +module type Fn = sig + val sentence_segment: string -> 'a -> 'a + val sentence_s: 'a -> 'a + val sentence_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Sentence_segment (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ when char_at cursor 1 = '[' -> true + | _ when char_at cursor 1 = '*' -> true + | _ when char_at cursor 1 = '_' -> true + | _ when char_at cursor 1 = '/' -> true + | _ when char_at cursor 1 = '`' -> true + | _ when char_at cursor 1 = '<' -> true + | _ -> false + let at = at s e + let parse cur acc = F.sentence_segment (segment_string cur) acc +end + +module Sentence (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ -> false + let at = at s e + let subsyntaxes = [| |] + let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e +end diff --git a/branches/master/parsers/uri.ml b/branches/master/parsers/uri.ml new file mode 100644 index 0000000..5ed6cc0 --- /dev/null +++ b/branches/master/parsers/uri.ml @@ -0,0 +1,27 @@ +module type Fn = sig + type t + val angled_uri: string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Angled (F : Fn) = struct + type t = F.t + let s cur = function '<' -> let c = char_at cur 1 in letter c || digit c | _ -> false + let e _cur = function '>' -> true | _ -> false + let parse cur acc = F.angled_uri (segment_string (unwrap 1 cur)) acc +end + +(* module Uri (F : TextFn) = struct + * type t = F.t + * let rec is_scheme cur = function + * | ':' -> true + * | ch when letter ch -> is_scheme (next_char cur) (char_at cur 1) + * | _ -> false + * let s cur ch = letter ch && is_scheme (next_char cur) (char_at cur 1) + * let e cur _ch = match char_at cur 1 with '\n' | ' ' -> true | _ -> false + * let at = at s e + * let parse cur acc = F.angled_uri (segment_string cur) acc + * end *) + diff --git a/branches/master/syntax.ml b/branches/master/syntax.ml new file mode 100644 index 0000000..cf059a4 --- /dev/null +++ b/branches/master/syntax.ml @@ -0,0 +1,11 @@ +module type S = sig + val s: Cursor.t -> char -> bool + val e: Cursor.t -> char -> bool +end + +(*let str c = String.make 1 c*) + +let newline = function '\n' -> true | _ -> false +let printable ch = ch >= ' ' && ch <= '~' +let letter ch = (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') +let digit = function '0' .. '9' -> true | _ -> false diff --git a/branches/master/text_parse.opam b/branches/master/text_parse.opam new file mode 100644 index 0000000..382c6e2 --- /dev/null +++ b/branches/master/text_parse.opam @@ -0,0 +1,23 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.02" +synopsis: "Applicative text parsing library for OCaml" +maintainer: ["orbifx "] +license: "EUPL-1.2" +depends: [ + "dune" {>= "2.4"} +] +build: [ + ["dune" "subst"] {pinned} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] diff --git a/branches/origin-master/.gitignore b/branches/origin-master/.gitignore new file mode 100644 index 0000000..b6116b1 --- /dev/null +++ b/branches/origin-master/.gitignore @@ -0,0 +1,3 @@ +_build +*.txt +*.merlin diff --git a/branches/origin-master/LICENSE b/branches/origin-master/LICENSE new file mode 100644 index 0000000..fa3348e --- /dev/null +++ b/branches/origin-master/LICENSE @@ -0,0 +1,153 @@ +EUROPEAN UNION PUBLIC LICENCE v. 1.2 +EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Work: + +Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions +In this Licence, the following terms have the following meaning: + +— ‘The Licence’: this Licence. + +— ‘The Original Work’: the work or software distributed or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be. + +— ‘Derivative Works’: the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15. + +— ‘The Work’: the Original Work or its Derivative Works. + +— ‘The Source Code’: the human-readable form of the Work which is the most convenient for people to study and modify. + +— ‘The Executable Code’: any code which has generally been compiled and which is meant to be interpreted by a computer as a program. + +— ‘The Licensor’: the natural or legal person that distributes or communicates the Work under the Licence. + +— ‘Contributor(s)’: any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work. + +— ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of the Work under the terms of the Licence. + +— ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, online or offline, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person. + +2. Scope of the rights granted by the Licence +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, sublicensable licence to do the following, for the duration of copyright vested in the Original Work: + +— use the Work in any circumstance and for all usage, + +— reproduce the Work, + +— modify the Work, and make Derivative Works based upon the Work, + +— communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work, + +— distribute the Work or copies thereof, + +— lend and rent the Work or copies thereof, + +— sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence. + +3. Communication of the Source Code +The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute or communicate the Work. + +4. Limitations on copyright +Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Work, of the exhaustion of those rights or of other applicable limitations thereto. + +5. Obligations of the Licensee +The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification. +Copyleft clause: If the Licensee distributes or communicates copies of the Original Works or Derivative Works, this Distribution or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence. +Compatibility clause: If the Licensee Distributes or Communicates Derivative Works or copies thereof based upon both the Work and another work licensed under a Compatible Licence, this Distribution or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, ‘Compatible Licence’ refers to the licences listed in the appendix attached to this Licence. Should the Licensee's obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail. +Provision of Source Code: When distributing or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute or communicate the Work. +Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice. +6. Chain of Authorship +The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contri butions to the Work, under the terms of this Licence. + +7. Disclaimer of Warranty +The Work is a work in progress, which is continuously improved by numerous Contributors. It is not a finished work and may therefore contain defects or ‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work. + +8. Disclaimer of Liability +Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work. + +9. Additional agreements +While distributing the Work, You may choose to conclude an additional agreement, defining obligations or services consistent with this Licence. However, if accepting obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public +In case of any Distribution or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee. + +12. Termination of the Licence +The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence. + +13. Miscellaneous +Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed or reformed so as necessary to make it valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of this Licence or updated versions of the Appendix, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice. + +14. Jurisdiction +Without prejudice to specific agreement between parties, + +— any litigation resulting from the interpretation of this License, arising between the European Union institutions, bodies, offices or agencies, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Union, as laid down in article 272 of the Treaty on the Functioning of the European Union, + +— any litigation arising between other parties and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business. + +15. Applicable Law +Without prejudice to specific agreement between parties, + +— this Licence shall be governed by the law of the European Union Member State where the Licensor has his seat, resides or has his registered office, + +— this licence shall be governed by Belgian law if the Licensor has no seat, residence or registered office inside a European Union Member State. + +Appendix +‘Compatible Licences’ according to Article 5 EUPL are: + +— GNU General Public License (GPL) v. 2, v. 3 + +— GNU Affero General Public License (AGPL) v. 3 + +— Open Software License (OSL) v. 2.1, v. 3.0 + +— Eclipse Public License (EPL) v. 1.0 + +— CeCILL v. 2.0, v. 2.1 + +— Mozilla Public Licence (MPL) v. 2 + +— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 + +— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for works other than software + +— European Union Public Licence (EUPL) v. 1.1, v. 1.2 + +— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong Reciprocity (LiLiQ-R+) + +The European Commission may update this Appendix to later versions of the above licences without producing a new version of the EUPL, as long as they provide the rights granted in Article 2 of this Licence and protect the covered Source Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new EUPL version. diff --git a/branches/origin-master/bin/cli.ml b/branches/origin-master/bin/cli.ml new file mode 100644 index 0000000..653d202 --- /dev/null +++ b/branches/origin-master/bin/cli.ml @@ -0,0 +1,23 @@ +module Test = struct + type t = unit + let blank_line () = print_string "{bl}" + let angled_uri s () = print_string ("{>}" ^ s ^ "{<}") + let plain_text s () = print_string s + let heading_hashbang i s () = print_string (string_of_int i ^ s) + let paragraph_s () = print_string "{p>}" + let paragraph_e () = print_string "{ " ^ u +let bold t a = a ^ "*" ^ t ^ "*" +let italic t a = a ^ "/" ^ t ^ "/" +let underline t a = a ^ "_" ^ t ^ "_" +let inline_monospace t a = a ^ "`" ^ t ^ "`" +let heading_hashbang lvl h a = a ^ String.make lvl '#' ^ h ^ "\n" +let paragraph_s a = a +let paragraph_e a = a +let preformatted s a = a ^ "
    " ^ s ^ "
    " +let bullet_list_s a = a +let bullet_list_e a = a +let bullet_item_s ch a = a ^ Char.escaped ch +let bullet_item_e a = a ^ "\n" +let ordered_list_s a = a +let ordered_list_e a = a +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value_pair k v a = prerr_endline @@ k ^ "~" ^ v; a + diff --git a/branches/origin-master/converters/html.ml b/branches/origin-master/converters/html.ml new file mode 100644 index 0000000..4a67e86 --- /dev/null +++ b/branches/origin-master/converters/html.ml @@ -0,0 +1,39 @@ +let esc x = + let fn a c = match c with + | '&' -> a ^ "&" + | '<' -> a ^ "<" + | '"' -> a ^ """ + | '\''-> a ^ "'" + | x -> a ^ String.make 1 x + in + Seq.fold_left fn "" (String.to_seq x) + +type t = string +let blank_line a = a ^ "" +let plain_text s a = a ^ esc s +let sentence_s a = a ^ "" +let sentence_e a = a ^ " " +let sentence_segment s a = a ^ esc s ^ " " +let reference_name n a = a ^ {||} ^ esc n ^ "" +let bracketed_referent_s n a = a ^ {||} ^ esc n ^ ": " +let bracketed_referent_e a = a ^ "
    " +let angled_uri u a = a ^ {|<|} ^ esc u ^ {|>|} +let bold t a = a ^ "" ^ esc t ^ "" +let italic t a = a ^ "" ^ esc t ^ "" +let underline t a = a ^ "" ^ esc t ^ "" +let inline_monospace t a = a ^ "" ^ esc t ^ "" +let heading_hashbang lvl h a = + let lvl = string_of_int lvl in + a ^ "" ^ esc h ^ "" +let paragraph_s a = a ^ "

    " +let paragraph_e a = a ^ "

    " +let preformatted s a = a ^ "
    " ^ esc s ^ "
    " +let bullet_list_s a = a ^ "
      " +let bullet_list_e a = a ^ "
    " +let bullet_item_s _ch a = a ^ "
  • " +let bullet_item_e a = a ^ "
  • " +let ordered_list_s a = a ^ "
      " +let ordered_list_e a = a ^ "
    " +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value k v a = prerr_endline @@ k ^ "~" ^ v; a diff --git a/branches/origin-master/cursor.ml b/branches/origin-master/cursor.ml new file mode 100644 index 0000000..aaa8b5d --- /dev/null +++ b/branches/origin-master/cursor.ml @@ -0,0 +1,22 @@ +type t = { text : string; pos : int; right_boundary : int } + +let overran cursor = cursor.pos >= cursor.right_boundary +let next_char cursor = { cursor with pos = cursor.pos + 1 } +let char_at cur offset = String.get cur.text (cur.pos + offset) +let char cur = String.get cur.text cur.pos +let distance a b = b.pos - a.pos + +let sub ?left ?right cur = { cur with + pos = Option.value left ~default:cur.pos; + right_boundary = Option.value right ~default:cur.right_boundary } + +let unwrap num cur = sub ~left:(cur.pos+num) ~right:(cur.right_boundary-num) cur + +let segment_string cur = String.sub cur.text cur.pos (cur.right_boundary - cur.pos) + +(*todo: reconsider +1 result and type cursor*) +let rec find_end e = function + | cur when cur.pos + 1 = String.length cur.text -> Some cur.pos + | cur when overran cur -> None + | cur when e cur (char cur) -> Some (cur.pos + 1) + | cur -> find_end e (next_char cur) diff --git a/branches/origin-master/dune b/branches/origin-master/dune new file mode 100644 index 0000000..889e5f1 --- /dev/null +++ b/branches/origin-master/dune @@ -0,0 +1,4 @@ +(library + (name text_parse) + (public_name text_parse) + (modules parser syntax cursor)) diff --git a/branches/origin-master/dune-project b/branches/origin-master/dune-project new file mode 100644 index 0000000..8138c5d --- /dev/null +++ b/branches/origin-master/dune-project @@ -0,0 +1,12 @@ +(lang dune 2.4) +(name text_parse) +(version 1.02) + +(license EUPL-1.2) +(maintainers "orbifx ") + +(generate_opam_files true) + +(package + (name text_parse) + (synopsis "Applicative text parsing library for OCaml")) diff --git a/branches/origin-master/parser.ml b/branches/origin-master/parser.ml new file mode 100644 index 0000000..2124048 --- /dev/null +++ b/branches/origin-master/parser.ml @@ -0,0 +1,37 @@ +module type S = sig + include Syntax.S + type t + val parse: Cursor.t -> t -> t +end + +module type Sub_parsers = sig + type t + val subparsers: (module S with type t = t) array +end + +let at s e cur ch = if s cur ch then Cursor.find_end e cur else None + +let apply_default (type a) (module P: S with type t = a) (acc: a) cursor_default cursor = + if cursor_default = cursor then acc + else P.parse (Cursor.sub ~right:(cursor.Cursor.pos) cursor_default) acc + +let rec branch: type a. ?idx:int -> a -> Cursor.t -> Cursor.t -> (module S with type t = a) array -> (a * Cursor.t) = +fun ?idx:(i=1) acc cursor_default cursor syntaxes -> + if Cursor.overran cursor then (apply_default syntaxes.(0) acc cursor_default cursor), cursor + else + try let (module P: S with type t = a) = syntaxes.(i) in + (match at P.s P.e cursor (Cursor.char cursor) with + | Some right -> + let acc = apply_default syntaxes.(0) acc cursor_default cursor in + let acc = P.parse (Cursor.sub ~right cursor) acc in + let cursor = Cursor.sub ~left:right cursor in + branch acc cursor cursor syntaxes + | None | exception Invalid_argument _ -> branch ~idx:(i+1) acc cursor_default cursor syntaxes) + with Invalid_argument _ -> + branch acc cursor_default (Cursor.next_char cursor) syntaxes + +let rec parse subsyntaxes cursor acc = + if Cursor.overran cursor then acc + else + let acc, cursor = branch acc cursor cursor subsyntaxes in + parse subsyntaxes cursor acc diff --git a/branches/origin-master/parsers/blank_line.ml b/branches/origin-master/parsers/blank_line.ml new file mode 100644 index 0000000..6d02f3e --- /dev/null +++ b/branches/origin-master/parsers/blank_line.ml @@ -0,0 +1,11 @@ +module type Fn = sig + type t + val blank_line: t -> t +end + +module Make (F : Fn) = struct + type t = F.t + let s _cur = function '\n' -> true | _ -> false + let e _cur = function '\n' -> true | _ -> false + let parse _cursor acc = F.blank_line acc +end diff --git a/branches/origin-master/parsers/bullet.ml b/branches/origin-master/parsers/bullet.ml new file mode 100644 index 0000000..b6c6f1c --- /dev/null +++ b/branches/origin-master/parsers/bullet.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val bullet_item_s: char -> 'a -> 'a + val bullet_item_e: 'a -> 'a + val bullet_list_s: 'a -> 'a + val bullet_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x-1 | None -> 0 in + F.bullet_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.bullet_item_e +end + +module List (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) && not (s cursor (char_at cursor 2)) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.bullet_list_s acc |> parse subsyntaxes cur |> F.bullet_list_e +end diff --git a/branches/origin-master/parsers/dune b/branches/origin-master/parsers/dune new file mode 100644 index 0000000..ae648ff --- /dev/null +++ b/branches/origin-master/parsers/dune @@ -0,0 +1,4 @@ +(library + (name parsers) + (public_name text_parse.parsers) + (libraries text_parse)) diff --git a/branches/origin-master/parsers/emphasis.ml b/branches/origin-master/parsers/emphasis.ml new file mode 100644 index 0000000..1486c5e --- /dev/null +++ b/branches/origin-master/parsers/emphasis.ml @@ -0,0 +1,32 @@ +module type Fn = sig + val bold: string -> 'a -> 'a + val italic: string -> 'a -> 'a + val underline: string -> 'a -> 'a + val inline_monospace: string -> 'a -> 'a +end + +open Text_parse.Cursor + +module Bold (F : Fn) = struct + let s _cursor = function '*' -> true | _ -> false + let e = s + let parse cur acc = F.bold (segment_string (unwrap 1 cur)) acc +end + +module Italic (F : Fn) = struct + let s _cursor = function '/' -> true | _ -> false + let e = s + let parse cur acc = F.italic (segment_string (unwrap 1 cur)) acc +end + +module Underline (F : Fn) = struct + let s _cursor = function '_' -> true | _ -> false + let e = s + let parse cur acc = F.underline (segment_string (unwrap 1 cur)) acc +end + +module Inline_monospace (F : Fn) = struct + let s _cursor = function '`' -> true | _ -> false + let e = s + let parse cur acc = F.inline_monospace (segment_string (unwrap 1 cur)) acc +end diff --git a/branches/origin-master/parsers/heading.ml b/branches/origin-master/parsers/heading.ml new file mode 100644 index 0000000..8827d7b --- /dev/null +++ b/branches/origin-master/parsers/heading.ml @@ -0,0 +1,17 @@ +module type Fn = sig + type t + val heading_hashbang: int -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Hashbang (F : Fn) = struct + type t = F.t + let s _cur = function '#' -> true | _ -> false + let e _cur = newline + let parse cursor acc = + let level = match find_end (fun _cur c -> c <> '#') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in + F.heading_hashbang level (segment_string { cursor with pos = cursor.pos + level + 1; right_boundary = cursor.right_boundary-1 }) acc +end diff --git a/branches/origin-master/parsers/key_value.ml b/branches/origin-master/parsers/key_value.ml new file mode 100644 index 0000000..98d9a87 --- /dev/null +++ b/branches/origin-master/parsers/key_value.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val key_value: string -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn) = struct + type t = F.t + let s _cur c = letter c + let e _cur c = newline c + let parse cursor acc = + let colon_pos = match find_end (fun _cur c -> c = ':') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in (*todo:None shouldn't be allowed by scope*) + let key = segment_string { cursor with right_boundary = cursor.pos+colon_pos } in + let value = segment_string { cursor with pos = cursor.pos+colon_pos+1; right_boundary = cursor.right_boundary } in + F.key_value key value acc +end diff --git a/branches/origin-master/parsers/markdown.ml b/branches/origin-master/parsers/markdown.ml new file mode 100644 index 0000000..4af084b --- /dev/null +++ b/branches/origin-master/parsers/markdown.ml @@ -0,0 +1,27 @@ +module type Markdown_t = sig + include Blank_line.Fn + include Reference.Fn with type t := t + include Bullet.Fn with type t := t + include Ordered.Fn with type t := t + include Heading.Fn with type t := t + include Preformatted.Fn with type t := t + include Paragraph.Fn with type t := t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Make (F : Markdown_t) = struct + let subsyntaxes = [| + (module Blank_line.Make (F) : Text_parse.Parser.S with type t = F.t); + (module Heading.Hashbang (F)); + (module Reference.Referent (F)); + (module Bullet.List (F)); + (module Ordered.List (F)); + (module Preformatted.Tabbed (F)); + (*(module Paragraph.Make (F));*) + |] + + let of_string text acc = + parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/origin-master/parsers/ordered.ml b/branches/origin-master/parsers/ordered.ml new file mode 100644 index 0000000..7d2c32e --- /dev/null +++ b/branches/origin-master/parsers/ordered.ml @@ -0,0 +1,37 @@ +module type Fn = sig + type t + val ordered_item_s: char -> 'a -> 'a + val ordered_item_e: 'a -> 'a + val ordered_list_s: 'a -> 'a + val ordered_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x | None -> 0 in + F.ordered_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.ordered_item_e +end + +module List (F: Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in (*todo: can't have `.` if sentence ends with it and lists are in sense*) + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) && newline (char_at cursor 2)(* not (s {cursor with pos = cursor.pos+2} (char_at cursor 2)) *) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.ordered_list_s acc |> parse subsyntaxes cur |> F.ordered_list_e +end diff --git a/branches/origin-master/parsers/paragraph.ml b/branches/origin-master/parsers/paragraph.ml new file mode 100644 index 0000000..216ee5e --- /dev/null +++ b/branches/origin-master/parsers/paragraph.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val paragraph_s: t -> t + val paragraph_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn)(S : Text_parse.Parser.Sub_parsers with type t = F.t) = struct + type t = F.t + let s _cur ch = printable ch + let e cur = function + | '\n' -> char_at cur (-1) = '\n' + | _ when cur.pos + 1 = cur.right_boundary -> true + | _ -> false + let parse cur acc = F.paragraph_s acc |> parse S.subparsers cur |> F.paragraph_e +end diff --git a/branches/origin-master/parsers/plain_text.ml b/branches/origin-master/parsers/plain_text.ml new file mode 100644 index 0000000..59496ea --- /dev/null +++ b/branches/origin-master/parsers/plain_text.ml @@ -0,0 +1,36 @@ +module type Fn = sig + type t + val plain_text: string -> t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Plain_text (F : Fn) = struct + type t = F.t + let s _cursor _ch = true + let e cursor = function + | '\n' -> char_at cursor (-1) = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ -> false + let parse cur acc = F.plain_text (segment_string cur) acc +end + +module type Plain_text_t = sig + include Blank_line.Fn + include Heading.Fn with type t := t + include Uri.Fn with type t := t + include Paragraph.Fn with type t := t + include Fn with type t := t +end + + +module Make (F : Plain_text_t) = struct + module P = struct + type t = F.t + let subparsers = [| (module Plain_text (F) : Text_parse.Parser.S with type t = F.t); (module Uri.Angled (F)) |] + end + + let subparsers = [| (module Paragraph.Make (F)(P) : Text_parse.Parser.S with type t = F.t); (module Blank_line.Make (F)); (module Heading.Hashbang (F)); (module Paragraph.Make (F)(P)); |] + let of_string text acc = parse subparsers { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/origin-master/parsers/preformatted.ml b/branches/origin-master/parsers/preformatted.ml new file mode 100644 index 0000000..80bb795 --- /dev/null +++ b/branches/origin-master/parsers/preformatted.ml @@ -0,0 +1,13 @@ +module type Fn = sig + type t + val tab_preformatted: string -> t -> t +end + +open Text_parse.Cursor + +module Tabbed (F : Fn) = struct + type t = F.t + let s _cur ch = '\t' = ch + let e cur = function '\n' -> not (char_at cur 1 = '\t') | _ -> false + let parse cur acc = F.tab_preformatted (segment_string cur) acc +end diff --git a/branches/origin-master/parsers/reference.ml b/branches/origin-master/parsers/reference.ml new file mode 100644 index 0000000..cf45ced --- /dev/null +++ b/branches/origin-master/parsers/reference.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val reference_name: string -> string -> t -> t + val referent_s: string -> t -> t + val referent_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor +open Text_parse.Syntax + +module Name (F : Fn) = struct + type t = F.t + let s _cursor = function '[' -> true | _ -> false + let e _cursor = function ']' -> true | _ -> false + let parse cur acc = F.reference_name (segment_string (unwrap 1 cur)) acc +end + +module Referent (F : Fn) = struct + type t = F.t + let find_name_end = find_end (fun cur c -> c = ']' && (char_at cur 1) = ':') + let s cur = function '[' -> Option.is_some (find_name_end cur) | _ -> false + let e _cur = newline + let subsyntaxes = [| |] + let parse cur acc = + let name_boundary = match find_name_end cur with Some x -> x | None -> 0 in + let name = segment_string { cur with pos = cur.pos+1; right_boundary = name_boundary-1 } in + let text_cur = { cur with pos = name_boundary+2 } in + F.referent_s name acc |> parse subsyntaxes text_cur |> F.referent_e +end + diff --git a/branches/origin-master/parsers/sentence.ml b/branches/origin-master/parsers/sentence.ml new file mode 100644 index 0000000..cb006ac --- /dev/null +++ b/branches/origin-master/parsers/sentence.ml @@ -0,0 +1,37 @@ +module type Fn = sig + val sentence_segment: string -> 'a -> 'a + val sentence_s: 'a -> 'a + val sentence_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Sentence_segment (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ when char_at cursor 1 = '[' -> true + | _ when char_at cursor 1 = '*' -> true + | _ when char_at cursor 1 = '_' -> true + | _ when char_at cursor 1 = '/' -> true + | _ when char_at cursor 1 = '`' -> true + | _ when char_at cursor 1 = '<' -> true + | _ -> false + let at = at s e + let parse cur acc = F.sentence_segment (segment_string cur) acc +end + +module Sentence (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ -> false + let at = at s e + let subsyntaxes = [| |] + let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e +end diff --git a/branches/origin-master/parsers/uri.ml b/branches/origin-master/parsers/uri.ml new file mode 100644 index 0000000..5ed6cc0 --- /dev/null +++ b/branches/origin-master/parsers/uri.ml @@ -0,0 +1,27 @@ +module type Fn = sig + type t + val angled_uri: string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Angled (F : Fn) = struct + type t = F.t + let s cur = function '<' -> let c = char_at cur 1 in letter c || digit c | _ -> false + let e _cur = function '>' -> true | _ -> false + let parse cur acc = F.angled_uri (segment_string (unwrap 1 cur)) acc +end + +(* module Uri (F : TextFn) = struct + * type t = F.t + * let rec is_scheme cur = function + * | ':' -> true + * | ch when letter ch -> is_scheme (next_char cur) (char_at cur 1) + * | _ -> false + * let s cur ch = letter ch && is_scheme (next_char cur) (char_at cur 1) + * let e cur _ch = match char_at cur 1 with '\n' | ' ' -> true | _ -> false + * let at = at s e + * let parse cur acc = F.angled_uri (segment_string cur) acc + * end *) + diff --git a/branches/origin-master/syntax.ml b/branches/origin-master/syntax.ml new file mode 100644 index 0000000..cf059a4 --- /dev/null +++ b/branches/origin-master/syntax.ml @@ -0,0 +1,11 @@ +module type S = sig + val s: Cursor.t -> char -> bool + val e: Cursor.t -> char -> bool +end + +(*let str c = String.make 1 c*) + +let newline = function '\n' -> true | _ -> false +let printable ch = ch >= ' ' && ch <= '~' +let letter ch = (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') +let digit = function '0' .. '9' -> true | _ -> false diff --git a/branches/origin-master/text_parse.opam b/branches/origin-master/text_parse.opam new file mode 100644 index 0000000..382c6e2 --- /dev/null +++ b/branches/origin-master/text_parse.opam @@ -0,0 +1,23 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.02" +synopsis: "Applicative text parsing library for OCaml" +maintainer: ["orbifx "] +license: "EUPL-1.2" +depends: [ + "dune" {>= "2.4"} +] +build: [ + ["dune" "subst"] {pinned} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] diff --git a/branches/origin/.gitignore b/branches/origin/.gitignore new file mode 100644 index 0000000..b6116b1 --- /dev/null +++ b/branches/origin/.gitignore @@ -0,0 +1,3 @@ +_build +*.txt +*.merlin diff --git a/branches/origin/LICENSE b/branches/origin/LICENSE new file mode 100644 index 0000000..fa3348e --- /dev/null +++ b/branches/origin/LICENSE @@ -0,0 +1,153 @@ +EUROPEAN UNION PUBLIC LICENCE v. 1.2 +EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Work: + +Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions +In this Licence, the following terms have the following meaning: + +— ‘The Licence’: this Licence. + +— ‘The Original Work’: the work or software distributed or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be. + +— ‘Derivative Works’: the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15. + +— ‘The Work’: the Original Work or its Derivative Works. + +— ‘The Source Code’: the human-readable form of the Work which is the most convenient for people to study and modify. + +— ‘The Executable Code’: any code which has generally been compiled and which is meant to be interpreted by a computer as a program. + +— ‘The Licensor’: the natural or legal person that distributes or communicates the Work under the Licence. + +— ‘Contributor(s)’: any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work. + +— ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of the Work under the terms of the Licence. + +— ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, online or offline, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person. + +2. Scope of the rights granted by the Licence +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, sublicensable licence to do the following, for the duration of copyright vested in the Original Work: + +— use the Work in any circumstance and for all usage, + +— reproduce the Work, + +— modify the Work, and make Derivative Works based upon the Work, + +— communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work, + +— distribute the Work or copies thereof, + +— lend and rent the Work or copies thereof, + +— sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence. + +3. Communication of the Source Code +The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute or communicate the Work. + +4. Limitations on copyright +Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Work, of the exhaustion of those rights or of other applicable limitations thereto. + +5. Obligations of the Licensee +The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification. +Copyleft clause: If the Licensee distributes or communicates copies of the Original Works or Derivative Works, this Distribution or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence. +Compatibility clause: If the Licensee Distributes or Communicates Derivative Works or copies thereof based upon both the Work and another work licensed under a Compatible Licence, this Distribution or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, ‘Compatible Licence’ refers to the licences listed in the appendix attached to this Licence. Should the Licensee's obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail. +Provision of Source Code: When distributing or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute or communicate the Work. +Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice. +6. Chain of Authorship +The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contri butions to the Work, under the terms of this Licence. + +7. Disclaimer of Warranty +The Work is a work in progress, which is continuously improved by numerous Contributors. It is not a finished work and may therefore contain defects or ‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work. + +8. Disclaimer of Liability +Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work. + +9. Additional agreements +While distributing the Work, You may choose to conclude an additional agreement, defining obligations or services consistent with this Licence. However, if accepting obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public +In case of any Distribution or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee. + +12. Termination of the Licence +The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence. + +13. Miscellaneous +Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed or reformed so as necessary to make it valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of this Licence or updated versions of the Appendix, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice. + +14. Jurisdiction +Without prejudice to specific agreement between parties, + +— any litigation resulting from the interpretation of this License, arising between the European Union institutions, bodies, offices or agencies, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Union, as laid down in article 272 of the Treaty on the Functioning of the European Union, + +— any litigation arising between other parties and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business. + +15. Applicable Law +Without prejudice to specific agreement between parties, + +— this Licence shall be governed by the law of the European Union Member State where the Licensor has his seat, resides or has his registered office, + +— this licence shall be governed by Belgian law if the Licensor has no seat, residence or registered office inside a European Union Member State. + +Appendix +‘Compatible Licences’ according to Article 5 EUPL are: + +— GNU General Public License (GPL) v. 2, v. 3 + +— GNU Affero General Public License (AGPL) v. 3 + +— Open Software License (OSL) v. 2.1, v. 3.0 + +— Eclipse Public License (EPL) v. 1.0 + +— CeCILL v. 2.0, v. 2.1 + +— Mozilla Public Licence (MPL) v. 2 + +— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 + +— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for works other than software + +— European Union Public Licence (EUPL) v. 1.1, v. 1.2 + +— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong Reciprocity (LiLiQ-R+) + +The European Commission may update this Appendix to later versions of the above licences without producing a new version of the EUPL, as long as they provide the rights granted in Article 2 of this Licence and protect the covered Source Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new EUPL version. diff --git a/branches/origin/bin/cli.ml b/branches/origin/bin/cli.ml new file mode 100644 index 0000000..653d202 --- /dev/null +++ b/branches/origin/bin/cli.ml @@ -0,0 +1,23 @@ +module Test = struct + type t = unit + let blank_line () = print_string "{bl}" + let angled_uri s () = print_string ("{>}" ^ s ^ "{<}") + let plain_text s () = print_string s + let heading_hashbang i s () = print_string (string_of_int i ^ s) + let paragraph_s () = print_string "{p>}" + let paragraph_e () = print_string "{ " ^ u +let bold t a = a ^ "*" ^ t ^ "*" +let italic t a = a ^ "/" ^ t ^ "/" +let underline t a = a ^ "_" ^ t ^ "_" +let inline_monospace t a = a ^ "`" ^ t ^ "`" +let heading_hashbang lvl h a = a ^ String.make lvl '#' ^ h ^ "\n" +let paragraph_s a = a +let paragraph_e a = a +let preformatted s a = a ^ "
    " ^ s ^ "
    " +let bullet_list_s a = a +let bullet_list_e a = a +let bullet_item_s ch a = a ^ Char.escaped ch +let bullet_item_e a = a ^ "\n" +let ordered_list_s a = a +let ordered_list_e a = a +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value_pair k v a = prerr_endline @@ k ^ "~" ^ v; a + diff --git a/branches/origin/converters/html.ml b/branches/origin/converters/html.ml new file mode 100644 index 0000000..4a67e86 --- /dev/null +++ b/branches/origin/converters/html.ml @@ -0,0 +1,39 @@ +let esc x = + let fn a c = match c with + | '&' -> a ^ "&" + | '<' -> a ^ "<" + | '"' -> a ^ """ + | '\''-> a ^ "'" + | x -> a ^ String.make 1 x + in + Seq.fold_left fn "" (String.to_seq x) + +type t = string +let blank_line a = a ^ "" +let plain_text s a = a ^ esc s +let sentence_s a = a ^ "" +let sentence_e a = a ^ " " +let sentence_segment s a = a ^ esc s ^ " " +let reference_name n a = a ^ {||} ^ esc n ^ "" +let bracketed_referent_s n a = a ^ {||} ^ esc n ^ ": " +let bracketed_referent_e a = a ^ "
    " +let angled_uri u a = a ^ {|<|} ^ esc u ^ {|>|} +let bold t a = a ^ "" ^ esc t ^ "" +let italic t a = a ^ "" ^ esc t ^ "" +let underline t a = a ^ "" ^ esc t ^ "" +let inline_monospace t a = a ^ "" ^ esc t ^ "" +let heading_hashbang lvl h a = + let lvl = string_of_int lvl in + a ^ "" ^ esc h ^ "" +let paragraph_s a = a ^ "

    " +let paragraph_e a = a ^ "

    " +let preformatted s a = a ^ "
    " ^ esc s ^ "
    " +let bullet_list_s a = a ^ "
      " +let bullet_list_e a = a ^ "
    " +let bullet_item_s _ch a = a ^ "
  • " +let bullet_item_e a = a ^ "
  • " +let ordered_list_s a = a ^ "
      " +let ordered_list_e a = a ^ "
    " +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value k v a = prerr_endline @@ k ^ "~" ^ v; a diff --git a/branches/origin/cursor.ml b/branches/origin/cursor.ml new file mode 100644 index 0000000..aaa8b5d --- /dev/null +++ b/branches/origin/cursor.ml @@ -0,0 +1,22 @@ +type t = { text : string; pos : int; right_boundary : int } + +let overran cursor = cursor.pos >= cursor.right_boundary +let next_char cursor = { cursor with pos = cursor.pos + 1 } +let char_at cur offset = String.get cur.text (cur.pos + offset) +let char cur = String.get cur.text cur.pos +let distance a b = b.pos - a.pos + +let sub ?left ?right cur = { cur with + pos = Option.value left ~default:cur.pos; + right_boundary = Option.value right ~default:cur.right_boundary } + +let unwrap num cur = sub ~left:(cur.pos+num) ~right:(cur.right_boundary-num) cur + +let segment_string cur = String.sub cur.text cur.pos (cur.right_boundary - cur.pos) + +(*todo: reconsider +1 result and type cursor*) +let rec find_end e = function + | cur when cur.pos + 1 = String.length cur.text -> Some cur.pos + | cur when overran cur -> None + | cur when e cur (char cur) -> Some (cur.pos + 1) + | cur -> find_end e (next_char cur) diff --git a/branches/origin/dune b/branches/origin/dune new file mode 100644 index 0000000..889e5f1 --- /dev/null +++ b/branches/origin/dune @@ -0,0 +1,4 @@ +(library + (name text_parse) + (public_name text_parse) + (modules parser syntax cursor)) diff --git a/branches/origin/dune-project b/branches/origin/dune-project new file mode 100644 index 0000000..8138c5d --- /dev/null +++ b/branches/origin/dune-project @@ -0,0 +1,12 @@ +(lang dune 2.4) +(name text_parse) +(version 1.02) + +(license EUPL-1.2) +(maintainers "orbifx ") + +(generate_opam_files true) + +(package + (name text_parse) + (synopsis "Applicative text parsing library for OCaml")) diff --git a/branches/origin/parser.ml b/branches/origin/parser.ml new file mode 100644 index 0000000..2124048 --- /dev/null +++ b/branches/origin/parser.ml @@ -0,0 +1,37 @@ +module type S = sig + include Syntax.S + type t + val parse: Cursor.t -> t -> t +end + +module type Sub_parsers = sig + type t + val subparsers: (module S with type t = t) array +end + +let at s e cur ch = if s cur ch then Cursor.find_end e cur else None + +let apply_default (type a) (module P: S with type t = a) (acc: a) cursor_default cursor = + if cursor_default = cursor then acc + else P.parse (Cursor.sub ~right:(cursor.Cursor.pos) cursor_default) acc + +let rec branch: type a. ?idx:int -> a -> Cursor.t -> Cursor.t -> (module S with type t = a) array -> (a * Cursor.t) = +fun ?idx:(i=1) acc cursor_default cursor syntaxes -> + if Cursor.overran cursor then (apply_default syntaxes.(0) acc cursor_default cursor), cursor + else + try let (module P: S with type t = a) = syntaxes.(i) in + (match at P.s P.e cursor (Cursor.char cursor) with + | Some right -> + let acc = apply_default syntaxes.(0) acc cursor_default cursor in + let acc = P.parse (Cursor.sub ~right cursor) acc in + let cursor = Cursor.sub ~left:right cursor in + branch acc cursor cursor syntaxes + | None | exception Invalid_argument _ -> branch ~idx:(i+1) acc cursor_default cursor syntaxes) + with Invalid_argument _ -> + branch acc cursor_default (Cursor.next_char cursor) syntaxes + +let rec parse subsyntaxes cursor acc = + if Cursor.overran cursor then acc + else + let acc, cursor = branch acc cursor cursor subsyntaxes in + parse subsyntaxes cursor acc diff --git a/branches/origin/parsers/blank_line.ml b/branches/origin/parsers/blank_line.ml new file mode 100644 index 0000000..6d02f3e --- /dev/null +++ b/branches/origin/parsers/blank_line.ml @@ -0,0 +1,11 @@ +module type Fn = sig + type t + val blank_line: t -> t +end + +module Make (F : Fn) = struct + type t = F.t + let s _cur = function '\n' -> true | _ -> false + let e _cur = function '\n' -> true | _ -> false + let parse _cursor acc = F.blank_line acc +end diff --git a/branches/origin/parsers/bullet.ml b/branches/origin/parsers/bullet.ml new file mode 100644 index 0000000..b6c6f1c --- /dev/null +++ b/branches/origin/parsers/bullet.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val bullet_item_s: char -> 'a -> 'a + val bullet_item_e: 'a -> 'a + val bullet_list_s: 'a -> 'a + val bullet_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x-1 | None -> 0 in + F.bullet_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.bullet_item_e +end + +module List (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) && not (s cursor (char_at cursor 2)) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.bullet_list_s acc |> parse subsyntaxes cur |> F.bullet_list_e +end diff --git a/branches/origin/parsers/dune b/branches/origin/parsers/dune new file mode 100644 index 0000000..ae648ff --- /dev/null +++ b/branches/origin/parsers/dune @@ -0,0 +1,4 @@ +(library + (name parsers) + (public_name text_parse.parsers) + (libraries text_parse)) diff --git a/branches/origin/parsers/emphasis.ml b/branches/origin/parsers/emphasis.ml new file mode 100644 index 0000000..1486c5e --- /dev/null +++ b/branches/origin/parsers/emphasis.ml @@ -0,0 +1,32 @@ +module type Fn = sig + val bold: string -> 'a -> 'a + val italic: string -> 'a -> 'a + val underline: string -> 'a -> 'a + val inline_monospace: string -> 'a -> 'a +end + +open Text_parse.Cursor + +module Bold (F : Fn) = struct + let s _cursor = function '*' -> true | _ -> false + let e = s + let parse cur acc = F.bold (segment_string (unwrap 1 cur)) acc +end + +module Italic (F : Fn) = struct + let s _cursor = function '/' -> true | _ -> false + let e = s + let parse cur acc = F.italic (segment_string (unwrap 1 cur)) acc +end + +module Underline (F : Fn) = struct + let s _cursor = function '_' -> true | _ -> false + let e = s + let parse cur acc = F.underline (segment_string (unwrap 1 cur)) acc +end + +module Inline_monospace (F : Fn) = struct + let s _cursor = function '`' -> true | _ -> false + let e = s + let parse cur acc = F.inline_monospace (segment_string (unwrap 1 cur)) acc +end diff --git a/branches/origin/parsers/heading.ml b/branches/origin/parsers/heading.ml new file mode 100644 index 0000000..8827d7b --- /dev/null +++ b/branches/origin/parsers/heading.ml @@ -0,0 +1,17 @@ +module type Fn = sig + type t + val heading_hashbang: int -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Hashbang (F : Fn) = struct + type t = F.t + let s _cur = function '#' -> true | _ -> false + let e _cur = newline + let parse cursor acc = + let level = match find_end (fun _cur c -> c <> '#') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in + F.heading_hashbang level (segment_string { cursor with pos = cursor.pos + level + 1; right_boundary = cursor.right_boundary-1 }) acc +end diff --git a/branches/origin/parsers/key_value.ml b/branches/origin/parsers/key_value.ml new file mode 100644 index 0000000..98d9a87 --- /dev/null +++ b/branches/origin/parsers/key_value.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val key_value: string -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn) = struct + type t = F.t + let s _cur c = letter c + let e _cur c = newline c + let parse cursor acc = + let colon_pos = match find_end (fun _cur c -> c = ':') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in (*todo:None shouldn't be allowed by scope*) + let key = segment_string { cursor with right_boundary = cursor.pos+colon_pos } in + let value = segment_string { cursor with pos = cursor.pos+colon_pos+1; right_boundary = cursor.right_boundary } in + F.key_value key value acc +end diff --git a/branches/origin/parsers/markdown.ml b/branches/origin/parsers/markdown.ml new file mode 100644 index 0000000..4af084b --- /dev/null +++ b/branches/origin/parsers/markdown.ml @@ -0,0 +1,27 @@ +module type Markdown_t = sig + include Blank_line.Fn + include Reference.Fn with type t := t + include Bullet.Fn with type t := t + include Ordered.Fn with type t := t + include Heading.Fn with type t := t + include Preformatted.Fn with type t := t + include Paragraph.Fn with type t := t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Make (F : Markdown_t) = struct + let subsyntaxes = [| + (module Blank_line.Make (F) : Text_parse.Parser.S with type t = F.t); + (module Heading.Hashbang (F)); + (module Reference.Referent (F)); + (module Bullet.List (F)); + (module Ordered.List (F)); + (module Preformatted.Tabbed (F)); + (*(module Paragraph.Make (F));*) + |] + + let of_string text acc = + parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/origin/parsers/ordered.ml b/branches/origin/parsers/ordered.ml new file mode 100644 index 0000000..7d2c32e --- /dev/null +++ b/branches/origin/parsers/ordered.ml @@ -0,0 +1,37 @@ +module type Fn = sig + type t + val ordered_item_s: char -> 'a -> 'a + val ordered_item_e: 'a -> 'a + val ordered_list_s: 'a -> 'a + val ordered_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x | None -> 0 in + F.ordered_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.ordered_item_e +end + +module List (F: Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in (*todo: can't have `.` if sentence ends with it and lists are in sense*) + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) && newline (char_at cursor 2)(* not (s {cursor with pos = cursor.pos+2} (char_at cursor 2)) *) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.ordered_list_s acc |> parse subsyntaxes cur |> F.ordered_list_e +end diff --git a/branches/origin/parsers/paragraph.ml b/branches/origin/parsers/paragraph.ml new file mode 100644 index 0000000..216ee5e --- /dev/null +++ b/branches/origin/parsers/paragraph.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val paragraph_s: t -> t + val paragraph_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn)(S : Text_parse.Parser.Sub_parsers with type t = F.t) = struct + type t = F.t + let s _cur ch = printable ch + let e cur = function + | '\n' -> char_at cur (-1) = '\n' + | _ when cur.pos + 1 = cur.right_boundary -> true + | _ -> false + let parse cur acc = F.paragraph_s acc |> parse S.subparsers cur |> F.paragraph_e +end diff --git a/branches/origin/parsers/plain_text.ml b/branches/origin/parsers/plain_text.ml new file mode 100644 index 0000000..59496ea --- /dev/null +++ b/branches/origin/parsers/plain_text.ml @@ -0,0 +1,36 @@ +module type Fn = sig + type t + val plain_text: string -> t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Plain_text (F : Fn) = struct + type t = F.t + let s _cursor _ch = true + let e cursor = function + | '\n' -> char_at cursor (-1) = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ -> false + let parse cur acc = F.plain_text (segment_string cur) acc +end + +module type Plain_text_t = sig + include Blank_line.Fn + include Heading.Fn with type t := t + include Uri.Fn with type t := t + include Paragraph.Fn with type t := t + include Fn with type t := t +end + + +module Make (F : Plain_text_t) = struct + module P = struct + type t = F.t + let subparsers = [| (module Plain_text (F) : Text_parse.Parser.S with type t = F.t); (module Uri.Angled (F)) |] + end + + let subparsers = [| (module Paragraph.Make (F)(P) : Text_parse.Parser.S with type t = F.t); (module Blank_line.Make (F)); (module Heading.Hashbang (F)); (module Paragraph.Make (F)(P)); |] + let of_string text acc = parse subparsers { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/branches/origin/parsers/preformatted.ml b/branches/origin/parsers/preformatted.ml new file mode 100644 index 0000000..80bb795 --- /dev/null +++ b/branches/origin/parsers/preformatted.ml @@ -0,0 +1,13 @@ +module type Fn = sig + type t + val tab_preformatted: string -> t -> t +end + +open Text_parse.Cursor + +module Tabbed (F : Fn) = struct + type t = F.t + let s _cur ch = '\t' = ch + let e cur = function '\n' -> not (char_at cur 1 = '\t') | _ -> false + let parse cur acc = F.tab_preformatted (segment_string cur) acc +end diff --git a/branches/origin/parsers/reference.ml b/branches/origin/parsers/reference.ml new file mode 100644 index 0000000..cf45ced --- /dev/null +++ b/branches/origin/parsers/reference.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val reference_name: string -> string -> t -> t + val referent_s: string -> t -> t + val referent_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor +open Text_parse.Syntax + +module Name (F : Fn) = struct + type t = F.t + let s _cursor = function '[' -> true | _ -> false + let e _cursor = function ']' -> true | _ -> false + let parse cur acc = F.reference_name (segment_string (unwrap 1 cur)) acc +end + +module Referent (F : Fn) = struct + type t = F.t + let find_name_end = find_end (fun cur c -> c = ']' && (char_at cur 1) = ':') + let s cur = function '[' -> Option.is_some (find_name_end cur) | _ -> false + let e _cur = newline + let subsyntaxes = [| |] + let parse cur acc = + let name_boundary = match find_name_end cur with Some x -> x | None -> 0 in + let name = segment_string { cur with pos = cur.pos+1; right_boundary = name_boundary-1 } in + let text_cur = { cur with pos = name_boundary+2 } in + F.referent_s name acc |> parse subsyntaxes text_cur |> F.referent_e +end + diff --git a/branches/origin/parsers/sentence.ml b/branches/origin/parsers/sentence.ml new file mode 100644 index 0000000..cb006ac --- /dev/null +++ b/branches/origin/parsers/sentence.ml @@ -0,0 +1,37 @@ +module type Fn = sig + val sentence_segment: string -> 'a -> 'a + val sentence_s: 'a -> 'a + val sentence_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Sentence_segment (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ when char_at cursor 1 = '[' -> true + | _ when char_at cursor 1 = '*' -> true + | _ when char_at cursor 1 = '_' -> true + | _ when char_at cursor 1 = '/' -> true + | _ when char_at cursor 1 = '`' -> true + | _ when char_at cursor 1 = '<' -> true + | _ -> false + let at = at s e + let parse cur acc = F.sentence_segment (segment_string cur) acc +end + +module Sentence (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ -> false + let at = at s e + let subsyntaxes = [| |] + let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e +end diff --git a/branches/origin/parsers/uri.ml b/branches/origin/parsers/uri.ml new file mode 100644 index 0000000..5ed6cc0 --- /dev/null +++ b/branches/origin/parsers/uri.ml @@ -0,0 +1,27 @@ +module type Fn = sig + type t + val angled_uri: string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Angled (F : Fn) = struct + type t = F.t + let s cur = function '<' -> let c = char_at cur 1 in letter c || digit c | _ -> false + let e _cur = function '>' -> true | _ -> false + let parse cur acc = F.angled_uri (segment_string (unwrap 1 cur)) acc +end + +(* module Uri (F : TextFn) = struct + * type t = F.t + * let rec is_scheme cur = function + * | ':' -> true + * | ch when letter ch -> is_scheme (next_char cur) (char_at cur 1) + * | _ -> false + * let s cur ch = letter ch && is_scheme (next_char cur) (char_at cur 1) + * let e cur _ch = match char_at cur 1 with '\n' | ' ' -> true | _ -> false + * let at = at s e + * let parse cur acc = F.angled_uri (segment_string cur) acc + * end *) + diff --git a/branches/origin/syntax.ml b/branches/origin/syntax.ml new file mode 100644 index 0000000..cf059a4 --- /dev/null +++ b/branches/origin/syntax.ml @@ -0,0 +1,11 @@ +module type S = sig + val s: Cursor.t -> char -> bool + val e: Cursor.t -> char -> bool +end + +(*let str c = String.make 1 c*) + +let newline = function '\n' -> true | _ -> false +let printable ch = ch >= ' ' && ch <= '~' +let letter ch = (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') +let digit = function '0' .. '9' -> true | _ -> false diff --git a/branches/origin/text_parse.opam b/branches/origin/text_parse.opam new file mode 100644 index 0000000..382c6e2 --- /dev/null +++ b/branches/origin/text_parse.opam @@ -0,0 +1,23 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.02" +synopsis: "Applicative text parsing library for OCaml" +maintainer: ["orbifx "] +license: "EUPL-1.2" +depends: [ + "dune" {>= "2.4"} +] +build: [ + ["dune" "subst"] {pinned} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] diff --git a/trunk/.gitignore b/trunk/.gitignore new file mode 100644 index 0000000..b6116b1 --- /dev/null +++ b/trunk/.gitignore @@ -0,0 +1,3 @@ +_build +*.txt +*.merlin diff --git a/trunk/LICENSE b/trunk/LICENSE new file mode 100644 index 0000000..fa3348e --- /dev/null +++ b/trunk/LICENSE @@ -0,0 +1,153 @@ +EUROPEAN UNION PUBLIC LICENCE v. 1.2 +EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Work: + +Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions +In this Licence, the following terms have the following meaning: + +— ‘The Licence’: this Licence. + +— ‘The Original Work’: the work or software distributed or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be. + +— ‘Derivative Works’: the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15. + +— ‘The Work’: the Original Work or its Derivative Works. + +— ‘The Source Code’: the human-readable form of the Work which is the most convenient for people to study and modify. + +— ‘The Executable Code’: any code which has generally been compiled and which is meant to be interpreted by a computer as a program. + +— ‘The Licensor’: the natural or legal person that distributes or communicates the Work under the Licence. + +— ‘Contributor(s)’: any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work. + +— ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of the Work under the terms of the Licence. + +— ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, online or offline, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person. + +2. Scope of the rights granted by the Licence +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, sublicensable licence to do the following, for the duration of copyright vested in the Original Work: + +— use the Work in any circumstance and for all usage, + +— reproduce the Work, + +— modify the Work, and make Derivative Works based upon the Work, + +— communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work, + +— distribute the Work or copies thereof, + +— lend and rent the Work or copies thereof, + +— sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence. + +3. Communication of the Source Code +The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute or communicate the Work. + +4. Limitations on copyright +Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Work, of the exhaustion of those rights or of other applicable limitations thereto. + +5. Obligations of the Licensee +The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification. +Copyleft clause: If the Licensee distributes or communicates copies of the Original Works or Derivative Works, this Distribution or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence. +Compatibility clause: If the Licensee Distributes or Communicates Derivative Works or copies thereof based upon both the Work and another work licensed under a Compatible Licence, this Distribution or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, ‘Compatible Licence’ refers to the licences listed in the appendix attached to this Licence. Should the Licensee's obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail. +Provision of Source Code: When distributing or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute or communicate the Work. +Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice. +6. Chain of Authorship +The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contri butions to the Work, under the terms of this Licence. + +7. Disclaimer of Warranty +The Work is a work in progress, which is continuously improved by numerous Contributors. It is not a finished work and may therefore contain defects or ‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work. + +8. Disclaimer of Liability +Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work. + +9. Additional agreements +While distributing the Work, You may choose to conclude an additional agreement, defining obligations or services consistent with this Licence. However, if accepting obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public +In case of any Distribution or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee. + +12. Termination of the Licence +The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence. + +13. Miscellaneous +Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed or reformed so as necessary to make it valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of this Licence or updated versions of the Appendix, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice. + +14. Jurisdiction +Without prejudice to specific agreement between parties, + +— any litigation resulting from the interpretation of this License, arising between the European Union institutions, bodies, offices or agencies, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Union, as laid down in article 272 of the Treaty on the Functioning of the European Union, + +— any litigation arising between other parties and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business. + +15. Applicable Law +Without prejudice to specific agreement between parties, + +— this Licence shall be governed by the law of the European Union Member State where the Licensor has his seat, resides or has his registered office, + +— this licence shall be governed by Belgian law if the Licensor has no seat, residence or registered office inside a European Union Member State. + +Appendix +‘Compatible Licences’ according to Article 5 EUPL are: + +— GNU General Public License (GPL) v. 2, v. 3 + +— GNU Affero General Public License (AGPL) v. 3 + +— Open Software License (OSL) v. 2.1, v. 3.0 + +— Eclipse Public License (EPL) v. 1.0 + +— CeCILL v. 2.0, v. 2.1 + +— Mozilla Public Licence (MPL) v. 2 + +— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 + +— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for works other than software + +— European Union Public Licence (EUPL) v. 1.1, v. 1.2 + +— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong Reciprocity (LiLiQ-R+) + +The European Commission may update this Appendix to later versions of the above licences without producing a new version of the EUPL, as long as they provide the rights granted in Article 2 of this Licence and protect the covered Source Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new EUPL version. diff --git a/trunk/bin/cli.ml b/trunk/bin/cli.ml new file mode 100644 index 0000000..653d202 --- /dev/null +++ b/trunk/bin/cli.ml @@ -0,0 +1,23 @@ +module Test = struct + type t = unit + let blank_line () = print_string "{bl}" + let angled_uri s () = print_string ("{>}" ^ s ^ "{<}") + let plain_text s () = print_string s + let heading_hashbang i s () = print_string (string_of_int i ^ s) + let paragraph_s () = print_string "{p>}" + let paragraph_e () = print_string "{ " ^ u +let bold t a = a ^ "*" ^ t ^ "*" +let italic t a = a ^ "/" ^ t ^ "/" +let underline t a = a ^ "_" ^ t ^ "_" +let inline_monospace t a = a ^ "`" ^ t ^ "`" +let heading_hashbang lvl h a = a ^ String.make lvl '#' ^ h ^ "\n" +let paragraph_s a = a +let paragraph_e a = a +let preformatted s a = a ^ "
    " ^ s ^ "
    " +let bullet_list_s a = a +let bullet_list_e a = a +let bullet_item_s ch a = a ^ Char.escaped ch +let bullet_item_e a = a ^ "\n" +let ordered_list_s a = a +let ordered_list_e a = a +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value_pair k v a = prerr_endline @@ k ^ "~" ^ v; a + diff --git a/trunk/converters/html.ml b/trunk/converters/html.ml new file mode 100644 index 0000000..4a67e86 --- /dev/null +++ b/trunk/converters/html.ml @@ -0,0 +1,39 @@ +let esc x = + let fn a c = match c with + | '&' -> a ^ "&" + | '<' -> a ^ "<" + | '"' -> a ^ """ + | '\''-> a ^ "'" + | x -> a ^ String.make 1 x + in + Seq.fold_left fn "" (String.to_seq x) + +type t = string +let blank_line a = a ^ "" +let plain_text s a = a ^ esc s +let sentence_s a = a ^ "" +let sentence_e a = a ^ " " +let sentence_segment s a = a ^ esc s ^ " " +let reference_name n a = a ^ {||} ^ esc n ^ "" +let bracketed_referent_s n a = a ^ {||} ^ esc n ^ ": " +let bracketed_referent_e a = a ^ "
    " +let angled_uri u a = a ^ {|<|} ^ esc u ^ {|>|} +let bold t a = a ^ "" ^ esc t ^ "" +let italic t a = a ^ "" ^ esc t ^ "" +let underline t a = a ^ "" ^ esc t ^ "" +let inline_monospace t a = a ^ "" ^ esc t ^ "" +let heading_hashbang lvl h a = + let lvl = string_of_int lvl in + a ^ "" ^ esc h ^ "" +let paragraph_s a = a ^ "

    " +let paragraph_e a = a ^ "

    " +let preformatted s a = a ^ "
    " ^ esc s ^ "
    " +let bullet_list_s a = a ^ "
      " +let bullet_list_e a = a ^ "
    " +let bullet_item_s _ch a = a ^ "
  • " +let bullet_item_e a = a ^ "
  • " +let ordered_list_s a = a ^ "
      " +let ordered_list_e a = a ^ "
    " +let ordered_item_s = bullet_item_s +let ordered_item_e = bullet_item_e +let key_value k v a = prerr_endline @@ k ^ "~" ^ v; a diff --git a/trunk/cursor.ml b/trunk/cursor.ml new file mode 100644 index 0000000..aaa8b5d --- /dev/null +++ b/trunk/cursor.ml @@ -0,0 +1,22 @@ +type t = { text : string; pos : int; right_boundary : int } + +let overran cursor = cursor.pos >= cursor.right_boundary +let next_char cursor = { cursor with pos = cursor.pos + 1 } +let char_at cur offset = String.get cur.text (cur.pos + offset) +let char cur = String.get cur.text cur.pos +let distance a b = b.pos - a.pos + +let sub ?left ?right cur = { cur with + pos = Option.value left ~default:cur.pos; + right_boundary = Option.value right ~default:cur.right_boundary } + +let unwrap num cur = sub ~left:(cur.pos+num) ~right:(cur.right_boundary-num) cur + +let segment_string cur = String.sub cur.text cur.pos (cur.right_boundary - cur.pos) + +(*todo: reconsider +1 result and type cursor*) +let rec find_end e = function + | cur when cur.pos + 1 = String.length cur.text -> Some cur.pos + | cur when overran cur -> None + | cur when e cur (char cur) -> Some (cur.pos + 1) + | cur -> find_end e (next_char cur) diff --git a/trunk/dune b/trunk/dune new file mode 100644 index 0000000..889e5f1 --- /dev/null +++ b/trunk/dune @@ -0,0 +1,4 @@ +(library + (name text_parse) + (public_name text_parse) + (modules parser syntax cursor)) diff --git a/trunk/dune-project b/trunk/dune-project new file mode 100644 index 0000000..8138c5d --- /dev/null +++ b/trunk/dune-project @@ -0,0 +1,12 @@ +(lang dune 2.4) +(name text_parse) +(version 1.02) + +(license EUPL-1.2) +(maintainers "orbifx ") + +(generate_opam_files true) + +(package + (name text_parse) + (synopsis "Applicative text parsing library for OCaml")) diff --git a/trunk/parser.ml b/trunk/parser.ml new file mode 100644 index 0000000..2124048 --- /dev/null +++ b/trunk/parser.ml @@ -0,0 +1,37 @@ +module type S = sig + include Syntax.S + type t + val parse: Cursor.t -> t -> t +end + +module type Sub_parsers = sig + type t + val subparsers: (module S with type t = t) array +end + +let at s e cur ch = if s cur ch then Cursor.find_end e cur else None + +let apply_default (type a) (module P: S with type t = a) (acc: a) cursor_default cursor = + if cursor_default = cursor then acc + else P.parse (Cursor.sub ~right:(cursor.Cursor.pos) cursor_default) acc + +let rec branch: type a. ?idx:int -> a -> Cursor.t -> Cursor.t -> (module S with type t = a) array -> (a * Cursor.t) = +fun ?idx:(i=1) acc cursor_default cursor syntaxes -> + if Cursor.overran cursor then (apply_default syntaxes.(0) acc cursor_default cursor), cursor + else + try let (module P: S with type t = a) = syntaxes.(i) in + (match at P.s P.e cursor (Cursor.char cursor) with + | Some right -> + let acc = apply_default syntaxes.(0) acc cursor_default cursor in + let acc = P.parse (Cursor.sub ~right cursor) acc in + let cursor = Cursor.sub ~left:right cursor in + branch acc cursor cursor syntaxes + | None | exception Invalid_argument _ -> branch ~idx:(i+1) acc cursor_default cursor syntaxes) + with Invalid_argument _ -> + branch acc cursor_default (Cursor.next_char cursor) syntaxes + +let rec parse subsyntaxes cursor acc = + if Cursor.overran cursor then acc + else + let acc, cursor = branch acc cursor cursor subsyntaxes in + parse subsyntaxes cursor acc diff --git a/trunk/parsers/blank_line.ml b/trunk/parsers/blank_line.ml new file mode 100644 index 0000000..6d02f3e --- /dev/null +++ b/trunk/parsers/blank_line.ml @@ -0,0 +1,11 @@ +module type Fn = sig + type t + val blank_line: t -> t +end + +module Make (F : Fn) = struct + type t = F.t + let s _cur = function '\n' -> true | _ -> false + let e _cur = function '\n' -> true | _ -> false + let parse _cursor acc = F.blank_line acc +end diff --git a/trunk/parsers/bullet.ml b/trunk/parsers/bullet.ml new file mode 100644 index 0000000..b6c6f1c --- /dev/null +++ b/trunk/parsers/bullet.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val bullet_item_s: char -> 'a -> 'a + val bullet_item_e: 'a -> 'a + val bullet_list_s: 'a -> 'a + val bullet_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x-1 | None -> 0 in + F.bullet_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.bullet_item_e +end + +module List (F : Fn) = struct + type t = F.t + let s _cursor = function '-' | '+' | '*' -> true | _ -> false + let e cursor _ch = newline (char_at cursor 1) && not (s cursor (char_at cursor 2)) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.bullet_list_s acc |> parse subsyntaxes cur |> F.bullet_list_e +end diff --git a/trunk/parsers/dune b/trunk/parsers/dune new file mode 100644 index 0000000..ae648ff --- /dev/null +++ b/trunk/parsers/dune @@ -0,0 +1,4 @@ +(library + (name parsers) + (public_name text_parse.parsers) + (libraries text_parse)) diff --git a/trunk/parsers/emphasis.ml b/trunk/parsers/emphasis.ml new file mode 100644 index 0000000..1486c5e --- /dev/null +++ b/trunk/parsers/emphasis.ml @@ -0,0 +1,32 @@ +module type Fn = sig + val bold: string -> 'a -> 'a + val italic: string -> 'a -> 'a + val underline: string -> 'a -> 'a + val inline_monospace: string -> 'a -> 'a +end + +open Text_parse.Cursor + +module Bold (F : Fn) = struct + let s _cursor = function '*' -> true | _ -> false + let e = s + let parse cur acc = F.bold (segment_string (unwrap 1 cur)) acc +end + +module Italic (F : Fn) = struct + let s _cursor = function '/' -> true | _ -> false + let e = s + let parse cur acc = F.italic (segment_string (unwrap 1 cur)) acc +end + +module Underline (F : Fn) = struct + let s _cursor = function '_' -> true | _ -> false + let e = s + let parse cur acc = F.underline (segment_string (unwrap 1 cur)) acc +end + +module Inline_monospace (F : Fn) = struct + let s _cursor = function '`' -> true | _ -> false + let e = s + let parse cur acc = F.inline_monospace (segment_string (unwrap 1 cur)) acc +end diff --git a/trunk/parsers/heading.ml b/trunk/parsers/heading.ml new file mode 100644 index 0000000..8827d7b --- /dev/null +++ b/trunk/parsers/heading.ml @@ -0,0 +1,17 @@ +module type Fn = sig + type t + val heading_hashbang: int -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Hashbang (F : Fn) = struct + type t = F.t + let s _cur = function '#' -> true | _ -> false + let e _cur = newline + let parse cursor acc = + let level = match find_end (fun _cur c -> c <> '#') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in + F.heading_hashbang level (segment_string { cursor with pos = cursor.pos + level + 1; right_boundary = cursor.right_boundary-1 }) acc +end diff --git a/trunk/parsers/key_value.ml b/trunk/parsers/key_value.ml new file mode 100644 index 0000000..98d9a87 --- /dev/null +++ b/trunk/parsers/key_value.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val key_value: string -> string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn) = struct + type t = F.t + let s _cur c = letter c + let e _cur c = newline c + let parse cursor acc = + let colon_pos = match find_end (fun _cur c -> c = ':') cursor with + Some x -> x - cursor.pos - 1 | None -> 0 in (*todo:None shouldn't be allowed by scope*) + let key = segment_string { cursor with right_boundary = cursor.pos+colon_pos } in + let value = segment_string { cursor with pos = cursor.pos+colon_pos+1; right_boundary = cursor.right_boundary } in + F.key_value key value acc +end diff --git a/trunk/parsers/markdown.ml b/trunk/parsers/markdown.ml new file mode 100644 index 0000000..4af084b --- /dev/null +++ b/trunk/parsers/markdown.ml @@ -0,0 +1,27 @@ +module type Markdown_t = sig + include Blank_line.Fn + include Reference.Fn with type t := t + include Bullet.Fn with type t := t + include Ordered.Fn with type t := t + include Heading.Fn with type t := t + include Preformatted.Fn with type t := t + include Paragraph.Fn with type t := t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Make (F : Markdown_t) = struct + let subsyntaxes = [| + (module Blank_line.Make (F) : Text_parse.Parser.S with type t = F.t); + (module Heading.Hashbang (F)); + (module Reference.Referent (F)); + (module Bullet.List (F)); + (module Ordered.List (F)); + (module Preformatted.Tabbed (F)); + (*(module Paragraph.Make (F));*) + |] + + let of_string text acc = + parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/trunk/parsers/ordered.ml b/trunk/parsers/ordered.ml new file mode 100644 index 0000000..7d2c32e --- /dev/null +++ b/trunk/parsers/ordered.ml @@ -0,0 +1,37 @@ +module type Fn = sig + type t + val ordered_item_s: char -> 'a -> 'a + val ordered_item_e: 'a -> 'a + val ordered_list_s: 'a -> 'a + val ordered_list_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Item (F : Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) + let subsyntaxes = [||] + let parse cur acc = + let bullet_char = char cur in + let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 } + with Some x -> x | None -> 0 in + F.ordered_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.ordered_item_e +end + +module List (F: Fn) = struct + type t = F.t + let s cur ch = + let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in + let is_delim c = c = '.' || c = ')' in (*todo: can't have `.` if sentence ends with it and lists are in sense*) + is_enum ch && is_delim (char_at cur 1) + let e cursor _ch = newline (char_at cursor 1) && newline (char_at cursor 2)(* not (s {cursor with pos = cursor.pos+2} (char_at cursor 2)) *) + let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |] + let parse cur acc = F.ordered_list_s acc |> parse subsyntaxes cur |> F.ordered_list_e +end diff --git a/trunk/parsers/paragraph.ml b/trunk/parsers/paragraph.ml new file mode 100644 index 0000000..216ee5e --- /dev/null +++ b/trunk/parsers/paragraph.ml @@ -0,0 +1,19 @@ +module type Fn = sig + type t + val paragraph_s: t -> t + val paragraph_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Make (F : Fn)(S : Text_parse.Parser.Sub_parsers with type t = F.t) = struct + type t = F.t + let s _cur ch = printable ch + let e cur = function + | '\n' -> char_at cur (-1) = '\n' + | _ when cur.pos + 1 = cur.right_boundary -> true + | _ -> false + let parse cur acc = F.paragraph_s acc |> parse S.subparsers cur |> F.paragraph_e +end diff --git a/trunk/parsers/plain_text.ml b/trunk/parsers/plain_text.ml new file mode 100644 index 0000000..59496ea --- /dev/null +++ b/trunk/parsers/plain_text.ml @@ -0,0 +1,36 @@ +module type Fn = sig + type t + val plain_text: string -> t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor + +module Plain_text (F : Fn) = struct + type t = F.t + let s _cursor _ch = true + let e cursor = function + | '\n' -> char_at cursor (-1) = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ -> false + let parse cur acc = F.plain_text (segment_string cur) acc +end + +module type Plain_text_t = sig + include Blank_line.Fn + include Heading.Fn with type t := t + include Uri.Fn with type t := t + include Paragraph.Fn with type t := t + include Fn with type t := t +end + + +module Make (F : Plain_text_t) = struct + module P = struct + type t = F.t + let subparsers = [| (module Plain_text (F) : Text_parse.Parser.S with type t = F.t); (module Uri.Angled (F)) |] + end + + let subparsers = [| (module Paragraph.Make (F)(P) : Text_parse.Parser.S with type t = F.t); (module Blank_line.Make (F)); (module Heading.Hashbang (F)); (module Paragraph.Make (F)(P)); |] + let of_string text acc = parse subparsers { text; pos = 0; right_boundary = String.length text - 1 } acc +end diff --git a/trunk/parsers/preformatted.ml b/trunk/parsers/preformatted.ml new file mode 100644 index 0000000..80bb795 --- /dev/null +++ b/trunk/parsers/preformatted.ml @@ -0,0 +1,13 @@ +module type Fn = sig + type t + val tab_preformatted: string -> t -> t +end + +open Text_parse.Cursor + +module Tabbed (F : Fn) = struct + type t = F.t + let s _cur ch = '\t' = ch + let e cur = function '\n' -> not (char_at cur 1 = '\t') | _ -> false + let parse cur acc = F.tab_preformatted (segment_string cur) acc +end diff --git a/trunk/parsers/reference.ml b/trunk/parsers/reference.ml new file mode 100644 index 0000000..cf45ced --- /dev/null +++ b/trunk/parsers/reference.ml @@ -0,0 +1,31 @@ +module type Fn = sig + type t + val reference_name: string -> string -> t -> t + val referent_s: string -> t -> t + val referent_e: t -> t +end + +open Text_parse.Parser +open Text_parse.Cursor +open Text_parse.Syntax + +module Name (F : Fn) = struct + type t = F.t + let s _cursor = function '[' -> true | _ -> false + let e _cursor = function ']' -> true | _ -> false + let parse cur acc = F.reference_name (segment_string (unwrap 1 cur)) acc +end + +module Referent (F : Fn) = struct + type t = F.t + let find_name_end = find_end (fun cur c -> c = ']' && (char_at cur 1) = ':') + let s cur = function '[' -> Option.is_some (find_name_end cur) | _ -> false + let e _cur = newline + let subsyntaxes = [| |] + let parse cur acc = + let name_boundary = match find_name_end cur with Some x -> x | None -> 0 in + let name = segment_string { cur with pos = cur.pos+1; right_boundary = name_boundary-1 } in + let text_cur = { cur with pos = name_boundary+2 } in + F.referent_s name acc |> parse subsyntaxes text_cur |> F.referent_e +end + diff --git a/trunk/parsers/sentence.ml b/trunk/parsers/sentence.ml new file mode 100644 index 0000000..cb006ac --- /dev/null +++ b/trunk/parsers/sentence.ml @@ -0,0 +1,37 @@ +module type Fn = sig + val sentence_segment: string -> 'a -> 'a + val sentence_s: 'a -> 'a + val sentence_e: 'a -> 'a +end + +open Text_parse.Parser +open Text_parse.Syntax +open Text_parse.Cursor + +module Sentence_segment (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ when cursor.pos + 1 = cursor.right_boundary -> true + | _ when char_at cursor 1 = '[' -> true + | _ when char_at cursor 1 = '*' -> true + | _ when char_at cursor 1 = '_' -> true + | _ when char_at cursor 1 = '/' -> true + | _ when char_at cursor 1 = '`' -> true + | _ when char_at cursor 1 = '<' -> true + | _ -> false + let at = at s e + let parse cur acc = F.sentence_segment (segment_string cur) acc +end + +module Sentence (F : Fn) = struct + let s _cursor = printable + let e cursor = function + | '.' -> char_at cursor 1 = ' ' (* todo punctuations *) + | '\n' -> char_at cursor 1 = '\n' + | _ -> false + let at = at s e + let subsyntaxes = [| |] + let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e +end diff --git a/trunk/parsers/uri.ml b/trunk/parsers/uri.ml new file mode 100644 index 0000000..5ed6cc0 --- /dev/null +++ b/trunk/parsers/uri.ml @@ -0,0 +1,27 @@ +module type Fn = sig + type t + val angled_uri: string -> t -> t +end + +open Text_parse.Syntax +open Text_parse.Cursor + +module Angled (F : Fn) = struct + type t = F.t + let s cur = function '<' -> let c = char_at cur 1 in letter c || digit c | _ -> false + let e _cur = function '>' -> true | _ -> false + let parse cur acc = F.angled_uri (segment_string (unwrap 1 cur)) acc +end + +(* module Uri (F : TextFn) = struct + * type t = F.t + * let rec is_scheme cur = function + * | ':' -> true + * | ch when letter ch -> is_scheme (next_char cur) (char_at cur 1) + * | _ -> false + * let s cur ch = letter ch && is_scheme (next_char cur) (char_at cur 1) + * let e cur _ch = match char_at cur 1 with '\n' | ' ' -> true | _ -> false + * let at = at s e + * let parse cur acc = F.angled_uri (segment_string cur) acc + * end *) + diff --git a/trunk/syntax.ml b/trunk/syntax.ml new file mode 100644 index 0000000..cf059a4 --- /dev/null +++ b/trunk/syntax.ml @@ -0,0 +1,11 @@ +module type S = sig + val s: Cursor.t -> char -> bool + val e: Cursor.t -> char -> bool +end + +(*let str c = String.make 1 c*) + +let newline = function '\n' -> true | _ -> false +let printable ch = ch >= ' ' && ch <= '~' +let letter ch = (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') +let digit = function '0' .. '9' -> true | _ -> false diff --git a/trunk/text_parse.opam b/trunk/text_parse.opam new file mode 100644 index 0000000..382c6e2 --- /dev/null +++ b/trunk/text_parse.opam @@ -0,0 +1,23 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.02" +synopsis: "Applicative text parsing library for OCaml" +maintainer: ["orbifx "] +license: "EUPL-1.2" +depends: [ + "dune" {>= "2.4"} +] +build: [ + ["dune" "subst"] {pinned} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +]