298 lines
10 KiB
EmacsLisp
298 lines
10 KiB
EmacsLisp
;;; esxml.el --- Library for working with xml via esxml and sxml
|
||
;; Copyright (C) 2012
|
||
|
||
;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu>
|
||
;; Maintainer: Evan Izaksonas-Smith
|
||
;; URL: https://github.com/tali713/esxml
|
||
;; Created: 15th August 2012
|
||
;; Version: 0.3.7
|
||
;; Keywords: tools, lisp, comm
|
||
;; Description: A library for easily generating XML/XHTML in elisp
|
||
;;
|
||
;; This program is free software; you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; This program is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
;;; Commentary:
|
||
|
||
;; This is XML/XHTML done with S-Expressions in EmacsLisp. Simply,
|
||
;; this is the easiest way to write HTML or XML in Lisp.
|
||
;;
|
||
;; This library uses the native form of XML representation as used by
|
||
;; many libraries already included within emacs. This representation
|
||
;; will be referred to as "esxml" throughout this library. See
|
||
;; `esxml-to-xml' for a concise description of the format.
|
||
;;
|
||
;; This library is not intended to be used directly by a user, though
|
||
;; it certainly could be. It could be used to generate static html,
|
||
;; or use a library like `elnode' to serve dynamic pages. Or even to
|
||
;; extract a form from a site to produce an API.
|
||
;;
|
||
;; TODO: Better documentation, more convenience.
|
||
;;
|
||
;; NOTICE: Code base will be transitioning to using pcase instead of
|
||
;; destructuring bind wherever possible. If this leads to hard to
|
||
;; debug code, please let me know, and I will do whatever I can to
|
||
;; resolve these issues.
|
||
;;
|
||
;;; Code:
|
||
(require 'cl-lib)
|
||
(require 'xml)
|
||
(require 'pcase)
|
||
|
||
(defun string-trim-whitespace (string)
|
||
"A simple function, strips the whitespace from beginning and
|
||
end of the string. Leaves all other whitespace untouched."
|
||
(replace-regexp-in-string
|
||
(rx string-start (* whitespace)
|
||
(group (+? anything))
|
||
(* whitespace) string-end)
|
||
"\\1"
|
||
string))
|
||
|
||
(defun esxml-trim-ws (esxml)
|
||
"This may cause problems, is intended for parsing xml into sxml
|
||
but may eroneously delete desirable white space."
|
||
(if (stringp esxml) (string-trim-whitespace esxml)
|
||
(pcase-let ((`(,tag ,attrs . ,body) esxml))
|
||
`(,tag ,attrs
|
||
,@(mapcar 'esxml-trim-ws body)))))
|
||
|
||
(defun attrp (attr)
|
||
"Returns t if attr is a an esxml attribute.
|
||
An esxml attribute is a cons of the form (symbol . string)"
|
||
(and (consp attr)
|
||
(symbolp (car attr))
|
||
(stringp (cdr attr))))
|
||
|
||
(defun esxml--convert-pair (attr)
|
||
"Converts from cons cell to attribute pair. Not intended for
|
||
general use."
|
||
(pcase-let ((`(,car . ,cdr) attr))
|
||
(cl-check-type cdr string)
|
||
(concat (symbol-name car)
|
||
"="
|
||
(prin1-to-string (xml-escape-string cdr)))))
|
||
|
||
(defun attrsp (attrs)
|
||
"Returns t if attrs is a list of esxml attributes.
|
||
|
||
See: `attrp'"
|
||
(and (listp attrs)
|
||
(cl-every (lambda (attr)
|
||
(and (consp attr)
|
||
(symbolp (car attr))
|
||
(stringp (cdr attr))))
|
||
attrs)))
|
||
|
||
(defun esxml-validate-form (esxml)
|
||
"A fast esxml validator. Will error on invalid subparts making
|
||
it suitable for hindsight testing."
|
||
(cond ((stringp esxml) nil)
|
||
((< (length esxml) 2)
|
||
(error "%s is too short to be a valid esxml expression" esxml))
|
||
(t (pcase-let ((`(,tag ,attrs . ,body) esxml))
|
||
(cl-check-type tag symbol)
|
||
(cl-check-type attrs attrs)
|
||
(mapcar 'esxml-validate-form body)))))
|
||
|
||
;; While the following could certainly have been written using format,
|
||
;; concat makes them easier to read. Update later if neccesary for
|
||
;; efficiency.
|
||
|
||
;; Though at first glance the recursive nature of this function might
|
||
;; give one pause, since xml is a recursive data type, a recursive
|
||
;; parser is an optimal strategy. each node will be visited exactly
|
||
;; once during the transformation.
|
||
;;
|
||
;; Further, since a string is a terminal node and since xml can be
|
||
;; represented as a string, non dynamic portions of the page may be
|
||
;; precached quite easily.
|
||
(defun esxml--to-xml-recursive (esxml)
|
||
(pcase esxml
|
||
((pred stringp)
|
||
(xml-escape-string esxml))
|
||
(`(raw-string ,string)
|
||
(cl-check-type string stringp)
|
||
string)
|
||
(`(comment nil ,body)
|
||
(concat "<!-- " body " -->"))
|
||
(`(,tag ,attrs . ,body)
|
||
;; code goes here to catch invalid data.
|
||
(concat "<" (symbol-name tag)
|
||
(when attrs
|
||
(concat " " (mapconcat 'esxml--convert-pair attrs " ")))
|
||
(if body
|
||
(concat ">" (mapconcat 'esxml--to-xml-recursive body "")
|
||
"</" (symbol-name tag) ">")
|
||
"/>")))))
|
||
|
||
(defun esxml-to-xml (esxml)
|
||
"This translates an esxml expression, i.e. that which is returned
|
||
by xml-parse-region. The structure is defined as any of the
|
||
following forms:
|
||
|
||
- A string.
|
||
|
||
STRING
|
||
|
||
STRING: the string it is returned with entities escaped
|
||
|
||
- A list where the first element is the raw-string symbol and the
|
||
second is a string.
|
||
|
||
(raw-string STRING)
|
||
|
||
STRING: the string is returned unchanged. This allows for caching
|
||
of any constant parts, such as headers and footers.
|
||
|
||
- A list where the first element is the comment symbol and the
|
||
second is a string.
|
||
|
||
(comment STRING)
|
||
|
||
STRING: the string is embedded in a HTML comment.
|
||
|
||
- A list where the first element is the tag, the second is
|
||
an alist of attribute value pairs and the remainder of the list
|
||
is 0 or more esxml elements.
|
||
|
||
(TAG ATTRS &rest BODY)
|
||
|
||
TAG: is the tag and must be a symbol.
|
||
|
||
ATTRS: is an alist of attribute pairs each pair must be of the
|
||
form (KEY . VALUE).
|
||
|
||
KEY: is the name of the attribute and must be a symbol.
|
||
|
||
VALUE: is the value of the attribute and must be a string.
|
||
|
||
BODY: is zero or more esxml expressions. Having no body forms
|
||
implies that the tag should be self closed. If there is
|
||
one or more body forms the tag will always be explicitly
|
||
closed, even if they are the empty string."
|
||
(condition-case nil
|
||
(esxml--to-xml-recursive esxml)
|
||
(error (esxml-validate-form esxml))))
|
||
|
||
(defun pp-esxml-to-xml (esxml)
|
||
"This translates an esxml expresion as `esxml-to-xml' but
|
||
indents it for ease of human readability, it is necessarily
|
||
slower and will produce longer output."
|
||
(pcase esxml
|
||
((pred stringp)
|
||
(xml-escape-string esxml))
|
||
(`(raw-string ,string)
|
||
(cl-check-type string stringp)
|
||
string)
|
||
(`(comment nil ,body)
|
||
(concat "<!-- " body " -->"))
|
||
(`(,tag ,attrs . ,body)
|
||
(cl-check-type tag symbol)
|
||
(cl-check-type attrs attrs)
|
||
(concat "<" (symbol-name tag)
|
||
(when attrs
|
||
(concat " " (mapconcat 'esxml--convert-pair attrs " ")))
|
||
(if body
|
||
(concat ">" (if (cl-every 'stringp body)
|
||
(mapconcat 'identity body " ")
|
||
(concat "\n"
|
||
(replace-regexp-in-string
|
||
"^" " "
|
||
(mapconcat 'pp-esxml-to-xml body "\n"))
|
||
"\n"))
|
||
"</" (symbol-name tag) ">")
|
||
"/>")))
|
||
(_
|
||
(error "%s is not a valid esxml expression" esxml))))
|
||
|
||
(defun sxml-to-esxml (sxml)
|
||
"Translates sxml to esxml so the common standard can be used.
|
||
See: http://okmij.org/ftp/Scheme/SXML.html. Additionally,
|
||
(*RAW-STRING* \"string\") is translated to (raw-string
|
||
\"string\")."
|
||
(pcase sxml
|
||
(`(*RAW-STRING* ,body)
|
||
`(raw-string ,body))
|
||
(`(*COMMENT* ,body)
|
||
`(comment nil ,body))
|
||
(`(,tag (@ . ,attrs) . ,body)
|
||
`(,tag ,(mapcar (lambda (attr)
|
||
(cons (car attr)
|
||
(or (cadr attr)
|
||
(prin1-to-string (car attr)))))
|
||
attrs)
|
||
,@(mapcar 'sxml-to-esxml body)))
|
||
(`(,tag . ,body)
|
||
`(,tag nil
|
||
,@(mapcar 'sxml-to-esxml body)))
|
||
((and sxml (pred stringp)) sxml)))
|
||
|
||
(defun sxml-to-xml (sxml)
|
||
"Translates sxml to xml, via esxml, hey it's only a constant
|
||
factor. :)"
|
||
(esxml-to-xml (sxml-to-esxml sxml)))
|
||
|
||
|
||
|
||
;; TODO: make agnostic with respect to libxml vs xml.el
|
||
(defun xml-to-esxml (string &optional trim)
|
||
(with-temp-buffer
|
||
(insert string)
|
||
(let ((parse-tree (libxml-parse-xml-region (point-min)
|
||
(point-max))))
|
||
(if trim
|
||
(esxml-trim-ws parse-tree)
|
||
parse-tree))))
|
||
|
||
;; TODO, move to esxpath when mature
|
||
(defun esxml-get-by-key (esxml key value)
|
||
"Returns a list of all elements whose wttribute KEY match
|
||
VALUE. KEY should be a symbol, and VALUE should be a string.
|
||
Will not recurse below a match."
|
||
(unless (stringp esxml)
|
||
(pcase-let ((`(,tag ,attrs . ,body) esxml))
|
||
(if (equal value
|
||
(assoc-default key attrs))
|
||
(list esxml)
|
||
(apply 'append (mapcar (lambda (sexp)
|
||
(esxml-get-by-key sexp key value))
|
||
body))))))
|
||
|
||
(defun esxml-get-tags (esxml tags)
|
||
"Returns a list of all elements whose tag is a member of TAGS.
|
||
TAGS should be a list of tags to be matched against. Will not
|
||
recurse below a match."
|
||
(unless (stringp esxml)
|
||
(pcase-let ((`(,tag ,attrs . ,body) esxml))
|
||
(if (member tag tags)
|
||
(list esxml)
|
||
(apply 'append (mapcar (lambda (sexp)
|
||
(esxml-get-tags sexp tags))
|
||
body))))))
|
||
|
||
(defun esxml-get-forms (esxml)
|
||
"Returns a list of all forms."
|
||
(esxml-get-tags esxml '(form)))
|
||
|
||
;; taken from kv
|
||
(defmacro esxml-destructuring-mapcar (args sexp seq)
|
||
(declare (indent 2))
|
||
(let ((entry (make-symbol "entry")))
|
||
`(mapcar (lambda (,entry)
|
||
(cl-destructuring-bind ,args ,entry ,sexp))
|
||
,seq)))
|
||
|
||
(provide 'esxml)
|
||
;;; esxml.el ends here
|