;;; tomelr.el --- Convert S-expressions to TOML -*- lexical-binding: t -*- ;; Copyright (C) 2022 Free Software Foundation, Inc. ;; Author: Kaushal Modi ;; Version: 0.4.3 ;; Package-Requires: ((emacs "26.3") (map "3.2.1") (seq "2.23")) ;; Keywords: data, tools, toml, serialization, config ;; URL: https://github.com/kaushalmodi/tomelr/ ;; This file is not part of GNU Emacs. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; tomelr.el is a library for converting Lisp data expressions or ;; S-expressions to TOML format (https://toml.io/en/). ;; It has one entry point `tomelr-encode' which accepts a Lisp data ;; expression, usually in an alist or plist form, and return a string ;; representing the TOML serializaitno format. ;; Example using an alist as input: ;; ;; (tomelr-encode '((title . "My title") ;; (author . "Me") ;; (params . ((foo . 123))))) ;; ;; Output: ;; ;; title = "My title" ;; author = "Me" ;; [params] ;; foo = 123 ;; Example using an plist as input: ;; ;; (tomelr-encode '(:title "My title" ;; :author "Me" ;; :params (:foo 123))) ;; ;; Above snippet will give as the same TOML output shown above. ;; See the README.org on https://github.com/kaushalmodi/tomelr/ for ;; more examples and package details. ;;; Code: (require 'json) (require 'map) (require 'subr-x) ;For `string-trim' on Emacs versions 27.2 and older ;;; Variables (defvar tomelr-false '(:false 'false) "S-exp values to be interpreted as TOML `false'.") (defvar tomelr-encoding-default-indentation " " "String used for a single indentation level during encoding. This value is repeated for each further nested element.") (defvar tomelr-coerce-to-types '(boolean integer) "List of TOML types to which the TOML strings will be attempted to be coerced. Valid symbols that can be present in this list: boolean, integer, float For example, if this list contains `boolean' and if a string value is exactly \"true\", it will coerce to TOML boolean `true'.") (defvar tomelr-indent-multi-line-strings nil "Indent the multi-line TOML strings when non-nil. This option injects spaces after each newline to present the multi-line strings in a more readable format. *Note: This option should be set to non-nil only if the TOML string data is insensitive to horizontal space. Good examples of this would be Org, Markdown or HTML strings.") ;;;; Internal Variables (defvar tomelr--print-indentation-prefix "\n" "String used to start indentation during encoding.") (defvar tomelr--print-indentation-depth -1 "Current indentation level during encoding. Dictates repetitions of `tomelr-encoding-default-indentation'.") (defvar tomelr--print-table-hierarchy () "Internal variable used to save TOML Table hierarchies. This variable is used for both TOML Tables and Arrays of TOML Tables.") (defvar tomelr--print-keyval-separator " = " "String used to separate key-value pairs during encoding.") (defvar tomelr--date-time-regexp (concat "\\`[[:digit:]]\\{4\\}-[[:digit:]]\\{2\\}-[[:digit:]]\\{2\\}" "\\(?:[T ][[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}\\(?:\\.[[:digit:]]+\\)*" "\\(?:Z\\|[+-][[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}\\)*\\)*\\'") "Regexp to match RFC 3339 formatted date-time with offset. - https://toml.io/en/v1.0.0#offset-date-time - https://tools.ietf.org/html/rfc3339#section-5.8 Examples: 1979-05-27 1979-05-27T07:32:00Z 1979-05-27 07:32:00Z 1979-05-27T00:32:00-07:00 1979-05-27T00:32:00.999999+04:00.") ;;; Error conditions (define-error 'tomelr-error "Unknown TOML error") (define-error 'tomelr-key-format "Bad TOML object key" 'tomelr-error) ;;; Utilities (defmacro tomelr--with-output-to-string (&rest body) "Eval BODY in a temporary buffer bound to `standard-output'. Return the resulting buffer contents as a string." (declare (indent 0) (debug t)) `(with-output-to-string (with-current-buffer standard-output ;; This affords decent performance gains. (setq-local inhibit-modification-hooks t) ,@body))) (defmacro tomelr--with-indentation (&rest body) "Eval BODY with the TOML encoding nesting incremented by one step. This macro sets up appropriate variable bindings for `tomelr--print-indentation' to produce the correct indentation." (declare (debug t) (indent 0)) `(let ((tomelr--print-indentation-depth (1+ tomelr--print-indentation-depth))) ,@body)) (defun tomelr--print-indentation () "Insert the current indentation for TOML encoding at point." (insert tomelr--print-indentation-prefix) (dotimes (_ tomelr--print-indentation-depth) (insert tomelr-encoding-default-indentation))) ;;; Encoding ;;;; Booleans (defun tomelr--print-boolean (object) "Insert TOML boolean true or false at point if OBJECT is a boolean. Return nil if OBJECT is not recognized as a TOML boolean." (prog1 (setq object (cond ((or (eq object t) (and (member 'boolean tomelr-coerce-to-types) (member object '("true" true)))) "true") ((or (member object tomelr-false) (and (member 'boolean tomelr-coerce-to-types) (member object '("false" false)))) "false"))) (and object (insert object)))) ;;;; Strings (defun tomelr--print-string (string) "Insert a TOML representation of STRING at point. Return the same STRING passed as input." ;; (message "[tomelr--print-string DBG] string = `%s'" string) (let ((special-chars '((?b . ?\b) ;U+0008 (?f . ?\f) ;U+000C (?\\ . ?\\))) (special-chars-re (rx (in ?\" ?\\ cntrl ?\u007F))) ;cntrl is same as (?\u0000 . ?\u001F) ;; Use multi-line string quotation if the string contains a " ;; char or a newline - """STRING""". (multi-line (string-match-p "\n\\|\"" string)) begin-q end-q) (cond (multi-line ;; From https://toml.io/en/v1.0.0#string, Any Unicode ;; character may be used except those that must be escaped: ;; backslash and the control characters other than tab, line ;; feed, and carriage return (U+0000 to U+0008, U+000B, ;; U+000C, U+000E to U+001F, U+007F). (setq special-chars-re (rx (in ?\\ (?\u0000 . ?\u0008) ?\u000B ?\u000C (?\u000E . ?\u001F) ?\u007F))) (setq begin-q "\"\"\"\n") (setq end-q "\"\"\"") (when tomelr-indent-multi-line-strings (let (;; Fix the indentation of multi-line strings to 2 ;; spaces. If the indentation is increased to 4 or more ;; spaces, those strings will get parsed as code blocks ;; by Markdown parsers. (indentation " ")) (setq string (concat indentation ;Indent the first line in the multi-line string (replace-regexp-in-string "\\(\n\\)\\([^\n]\\)" ;Don't indent blank lines (format "\\1%s\\2" indentation) string) "\n" indentation ;Indent the closing """ at the end of the multi-line string ))))) (t ;Basic quotation "STRING" (push '(?\" . ?\") special-chars) (push '(?t . ?\t) special-chars) ;U+0009 (push '(?n . ?\n) special-chars) ;U+000A (push '(?r . ?\r) special-chars) ;U+000D (setq begin-q "\"") (setq end-q begin-q))) (and begin-q (insert begin-q)) (goto-char (prog1 (point) (princ string))) (while (re-search-forward special-chars-re nil :noerror) (let ((char (preceding-char))) (delete-char -1) (insert ?\\ (or ;; Escape special characters (car (rassq char special-chars)) ;; Fallback: UCS code point in \uNNNN form. (format "u%04x" char))))) (and end-q (insert end-q)) string)) (defun tomelr--print-stringlike (object &optional key-type) "Insert OBJECT encoded as a TOML string at point. Possible values of KEY-TYPE are `normal-key', `table-key', `table-array-key', or nil. Return nil if OBJECT cannot be encoded as a TOML string." ;; (message "[tomelr--print-stringlike DBG] object = %S (type = %S) key type = %S" ;; object (type-of object) key-type) (let ((str (cond ;; Object is a normal, TT or TTA key (key-type (cond ((stringp object) (if (string-match-p "\\`[A-Za-z0-9_-]+\\'" object) ;; https://toml.io/en/v1.0.0#keys ;; Bare keys may only contain ASCII letters, ASCII digits, ;; underscores, and dashes (A-Za-z0-9_-). object ;; Wrap string in double-quotes if it ;; doesn't contain only A-Za-z0-9_- chars. (format "\"%s\"" object))) ;; Plist keys as in (:foo 123) ((keywordp object) (string-trim-left (symbol-name object) ":")) ;; Alist keys as in ((foo . 123)) ((symbolp object) (symbol-name object)) (t (user-error "[tomelr--print-stringlike] Unhandled case of key-type")))) ;; Cases where object is a key value. ((symbolp object) (symbol-name object)) ((stringp object) object)))) ;; (message "[tomelr--print-stringlike DBG] str = %S" str) (when (member key-type '(table-key table-array-key)) ;; (message "[tomelr--print-stringlike DBG] %S is symbol, type = %S, depth = %d" ;; object key-type tomelr--print-indentation-depth) (if (null (nth tomelr--print-indentation-depth tomelr--print-table-hierarchy)) (setq tomelr--print-table-hierarchy (append tomelr--print-table-hierarchy (list str))) ;; Throw away table keys collected at higher depths, if ;; any, from earlier runs of this function. (setq tomelr--print-table-hierarchy (seq-take tomelr--print-table-hierarchy (1+ tomelr--print-indentation-depth))) (setf (nth tomelr--print-indentation-depth tomelr--print-table-hierarchy) str)) ;; (message "[tomelr--print-stringlike DBG] table hier: %S" tomelr--print-table-hierarchy) ) (cond ;; TT keys ((equal key-type 'table-key) (princ (format "[%s]" (string-join tomelr--print-table-hierarchy ".")))) ;; TTA keys ((equal key-type 'table-array-key) (princ (format "[[%s]]" (string-join tomelr--print-table-hierarchy ".")))) ;; Normal keys (Alist and Plist keys) ((equal key-type 'normal-key) (princ str)) (str (cond ((or ;; RFC 3339 Date/Time (string-match-p tomelr--date-time-regexp str) ;; Coercing ;; Integer that can be stored in the system as a fixnum. ;; For example, if `object' is "10040216507682529280" that ;; needs more than 64 bits to be stored as a signed ;; integer, it will be automatically stored as a float. ;; So (integerp (string-to-number object)) will return nil ;; [or `fixnump' instead of `integerp' in Emacs 27 or ;; newer]. ;; https://github.com/toml-lang/toml#integer ;; Integer examples: 7, +7, -7, 7_000 (and (or (symbolp object) (member 'integer tomelr-coerce-to-types)) (string-match-p "\\`[+-]?[[:digit:]_]+\\'" str) (if (functionp #'fixnump) ;`fixnump' and `bignump' get introduced in Emacs 27.x (fixnump (string-to-number str)) ;; On older Emacsen, `integerp' behaved the same as the ;; new `fixnump'. (integerp (string-to-number str))))) (princ str)) (t (tomelr--print-string str)))) (t nil)))) (defun tomelr--print-key (key &optional key-type) "Insert a TOML key representation of KEY at point. KEY-TYPE represents the type of key: `normal-key', `table-key' or `table-array-key'. Signal `tomelr-key-format' if it cannot be encoded as a string." (or (tomelr--print-stringlike key key-type) (signal 'tomelr-key-format (list key)))) ;;;; Objects ;; `tomelr-alist-p' is a slightly modified version of `json-alist-p'. ;; It fixes this scenario: (json-alist-p '((:a 1))) return t, which is wrong. ;; '((:a 1)) is an array of plist format maps, and not an alist. ;; (tomelr-alist-p '((:a 1))) returns nil as expected. (defun tomelr-alist-p (list) "Non-nil if and only if LIST is an alist with simple keys." (declare (pure t) (side-effect-free error-free)) (while (and (consp (car-safe list)) (not (json-plist-p (car-safe list))) (atom (caar list))) ;; (message "[tomelr-alist-p DBG] INSIDE list = %S, car = %S, caar = %S, atom of caar = %S" ;; list (car-safe list) (caar list) (atom (caar list))) (setq list (cdr list))) ;; (message "[tomelr-alist-p DBG] out 2 list = %S, is alist? %S" list (null list)) (null list)) (defun tomelr-toml-table-p (object) "Return non-nil if OBJECT can represent a TOML Table. Recognize both alist and plist format maps as TOML Tables. Examples: - Alist format: \\='((a . 1) (b . \"foo\")) - Plist format: \\='(:a 1 :b \"foo\")" (or (tomelr-alist-p object) (json-plist-p object))) (defun tomelr--print-pair (key val) "Insert TOML representation of KEY - VAL pair at point." (let ((key-type (cond ((tomelr-toml-table-p val) 'table-key) ((tomelr-toml-table-array-p val) 'table-array-key) (t 'normal-key)))) ;; (message "[tomelr--print-pair DBG] key = %S, val = %S, key-type = %S" ;; key val key-type) (when val ;Don't print the key if val is nil (tomelr--print-indentation) ;Newline before each key in a key-value pair (tomelr--print-key key key-type) ;; Skip putting the separator for table and table array keys. (unless (member key-type '(table-key table-array-key)) (insert tomelr--print-keyval-separator)) (tomelr--print val)))) (defun tomelr--print-map (map) "Insert a TOML representation of MAP at point. This works for any MAP satisfying `mapp'." ;; (message "[tomelr--print-map DBG] map = %S" map) (unless (map-empty-p map) (tomelr--with-indentation (map-do #'tomelr--print-pair map)))) ;;;; Lists (including alists and plists) (defun tomelr--print-list (list) "Insert a TOML representation of LIST at point." (cond ((tomelr-toml-table-p list) (tomelr--print-map list)) ((listp list) (tomelr--print-array list)) ((signal 'tomelr-error (list list))))) ;;;; Arrays (defun tomelr-toml-table-array-p (object) "Return non-nil if OBJECT can represent a TOML Table Array. Definition of a TOML Table Array (TTA): - OBJECT is TTA if it is of type ((TT1) (TT2) ..) where each element is a TOML Table (TT)." (when (or (listp object) (vectorp object)) (seq-every-p (lambda (elem) (tomelr-toml-table-p elem)) object))) (defun tomelr--print-tta-key () "Print TOML Table Array key." ;; (message "[tomelr--print-array DBG] depth = %d" tomelr--print-indentation-depth) ;; Throw away table keys collected at higher depths, if ;; any, from earlier runs of this function. (setq tomelr--print-table-hierarchy (seq-take tomelr--print-table-hierarchy (1+ tomelr--print-indentation-depth))) (tomelr--print-indentation) (insert (format "[[%s]]" (string-join tomelr--print-table-hierarchy ".")))) (defun tomelr--print-array (array) "Insert a TOML representation of ARRAY at point." ;; (message "[tomelr--print-array DBG] array = %S, TTA = %S" ;; array (tomelr-toml-table-array-p array)) (cond ((tomelr-toml-table-array-p array) (unless (= 0 (length array)) (let ((first t)) (mapc (lambda (elt) (if first (setq first nil) (tomelr--print-tta-key)) (tomelr--print elt)) array)))) (t (insert "[") (unless (= 0 (length array)) (tomelr--with-indentation (let ((first t)) (mapc (lambda (elt) (if first (setq first nil) (insert ", ")) (tomelr--print elt)) array)))) (insert "]")))) ;;;; Print wrapper (defun tomelr--print (object) "Insert a TOML representation of OBJECT at point. See `tomelr-encode' that returns the same as a string." (cond ((tomelr--print-boolean object)) ((listp object) (tomelr--print-list object)) ((tomelr--print-stringlike object)) ((numberp object) (prin1 object)) ((arrayp object) (tomelr--print-array object)) ((signal 'tomelr-error (list object))))) ;;; User API (defun tomelr-encode (object) "Return a TOML representation of OBJECT as a string. If an error is detected during encoding, an error based on `tomelr-error' is signaled." (setq tomelr--print-table-hierarchy ()) (string-trim (tomelr--with-output-to-string (tomelr--print object)))) (provide 'tomelr) ;;; tomelr.el ends here