emacs/org/elpa/esxml-20220506.759/esxml-query.el

784 lines
28 KiB
EmacsLisp
Raw Normal View History

2022-08-25 15:19:45 +00:00
;;; esxml-query.el --- select esxml nodes jQuery-style
;; Copyright (C) 2017 Vasilij Schneidermann <mail@vasilij.de>
;; Author: Vasilij Schneidermann <mail@vasilij.de>
;; Maintainer: Vasilij Schneidermann
;; Version: 0.1.1
;; Keywords: data, lisp
;; Package-Requires: ((cl-lib "0.1"))
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; Traditionally people pick one of the following options when faced
;; with the task of extracting data from XML in Emacs Lisp:
;;
;; - Using regular expressions on the unparsed document
;; - Manual tree traversal with `assoc', `car' and `cdr'
;;
;; Browsers faced a similar problem until jQuery happened, shortly
;; afterwards they started providing the `node.querySelector' and
;; `node.querySelectorAll' API for retrieving one or all nodes
;; matching a given CSS selector. This code implements the same API
;; with the `esxml-query' and `esxml-query-all' functions. The
;; following table summarizes the currently supported modifiers and
;; combinators:
;;
;; | Name | Supported? | Syntax |
;; |------------------------------------+------------+-------------|
;; | Namespaces | No | foo|bar |
;; | Commas | Yes | foo, bar |
;; | Descendant combinator | Yes | foo bar |
;; | Child combinator | Yes | foo>bar |
;; | Adjacent sibling combinator | No | foo+bar |
;; | General sibling combinator | No | foo~bar |
;; | Universal selector | Yes | * |
;; | Type selector | Yes | tag |
;; | ID selector | Yes | #foo |
;; | Class selector | Yes | .foo |
;; | Attribute selector | Yes | [foo] |
;; | Exact match attribute selector | Yes | [foo=bar] |
;; | Prefix match attribute selector | Yes | [foo^=bar] |
;; | Suffix match attribute selector | Yes | [foo$=bar] |
;; | Substring match attribute selector | Yes | [foo*=bar] |
;; | Include match attribute selector | Yes | [foo~=bar] |
;; | Dash match attribute selector | Yes | [foo|=bar] |
;; | Attribute selector modifiers | No | [foo=bar i] |
;; | Pseudo elements | No | ::foo |
;; | Pseudo classes | No | :foo |
;;; Code:
(require 'cl-lib)
;;; CSS selector parsing
;; https://www.w3.org/TR/selectors/#w3cselgrammar
;; https://www.w3.org/TR/selectors4/#grammar
;; https://www.w3.org/TR/2003/WD-css3-syntax-20030813/#detailed-grammar
;; https://www.w3.org/TR/2003/WD-css3-syntax-20030813/#tokenization
;; you might be wondering why I'm using both level 3 and 4 standards,
;; well, the level 3 one has a buggy lexer section whereas level 4
;; omits crucial parser definitions, so both have to be used...
;; TODO: support :not
(defvar esxml--css-selector-token-matchers
(let* ((h "[0-9a-f]")
(nl "\n\\|\r\n\\|\r\\|\f")
(nonascii "[\200-\U0010ffff]")
(unicode (format "\\\\%s\\{1,6\\}[ \t\r\n\f]?" h))
(escape (format "\\(?:%s\\)\\|\\\\[ -~\200-\U0010ffff]" unicode))
(nmstart (format "[a-z_]\\|%s\\|\\(?:%s\\)" nonascii escape))
(nmchar (format "[a-z0-9_-]\\|%s\\|\\(?:%s\\)" nonascii escape))
(num "[0-9]+\\|[0-9]*\\.[0-9]+")
(string1 (format "\"\\(?:[\t !#$%%&(-~]\\|\\\\\\(?:%s\\)\\|'\\|%s\\|\\(?:%s\\)\\)*\"" nl nonascii escape))
(string2 (format "'\\(?:[\t !#$%%&(-~]\\|\\\\\\(?:%s\\)\\|\"\\|%s\\|\\(?:%s\\)\\)*'" nl nonascii escape))
(ident (format "[-]?\\(?:%s\\)\\(?:%s\\)*" nmstart nmchar))
(unit (format "[-]?\\(?:%s\\)\\(?:%s\\)+" nmstart nmchar))
(name (format "\\(?:%s\\)+" nmchar)))
`((whitespace . "[ \t\r\n\f]+")
(string . ,(format "\\(?:%s\\|%s\\)" string1 string2))
(ident . ,ident)
(hash . ,(format "#%s" name))
(function . ,(format "%s(" ident))
(number . ,num)
(dimension . ,(format "\\(?:%s\\)%s" num unit))
(prefix-match . "\\^=")
(suffix-match . "\\$=")
(substring-match . "\\*=")
(include-match . "~=")
(dash-match . "|=")
(comma . ",")
(gt . ">")
(plus . "\\+")
(minus . "-")
(tilde . "~")
(asterisk . "\\*")
(period . "\\.")
(equals . "=")
(colon . ":")
(lbracket . "\\[")
(rbracket . "\\]")
(rparen . ")"))))
(defun esxml--tokenize-css-selector (string)
(let (result)
(with-temp-buffer
(insert string)
(goto-char (point-min))
(while (not (eobp))
(let ((max-length 0)
longest)
(dolist (matcher esxml--css-selector-token-matchers)
(let ((id (car matcher))
(re (cdr matcher)))
(when (looking-at re)
(let* ((token (match-string 0))
(length (length token)))
(when (> length max-length)
(setq max-length length)
(setq longest (cons id token)))))))
(when (not longest)
(error "Invalid token detected: %s"
(buffer-substring (point) (point-max))))
(push longest result)
(goto-char (+ (point) max-length)))))
(nreverse result)))
;; the alternative is creating a mutable object with peek/next methods
;; and passing it around, so I chose the one requiring less typing, a
;; dynamically bound variable :<
(defvar esxml--token-stream)
;; TODO: support :not
;; css-selector:
;; css-selector-list;
;; css-selector-list:
;; complex-css-selector [ comma whitespace* complex-css-selector ]*;
;; complex-css-selector:
;; compound-css-selector [ css-combinator compound-css-selector ]* whitespace*;
;; css-combinator:
;; whitespace+ | whitespace* [ '>' | '+' | '~' ] whitespace*;
;; compound-css-selector:
;; css-type-selector css-modifier* | css-modifier+;
;; css-type-selector:
;; IDENT | *;
;; css-modifier:
;; css-id | css-class | css-attrib | css-pseudo;
;; css-id:
;; HASH;
;; css-class:
;; '.' IDENT;
;; css-attrib:
;; '[' whitespace* css-attrib-name ']'
;; | '[' whitespace* css-attrib-name css-attrib-match css-attrib-value whitespace* ']';
;; css-attrib-name:
;; IDENT whitespace*;
;; css-attrib-match:
;; [ '=' | PREFIX-MATCH | SUFFIX-MATCH | SUBSTRING-MATCH | INCLUDE-MATCH | DASH-MATCH ] whitespace*;
;; css-attrib-value:
;; IDENT | STRING;
;; css-pseudo:
;; ':' ':'? [ IDENT | css-functional-pseudo ];
;; css-functional-pseudo:
;; FUNCTION whitespace* [ css-expression whitespace* ]+ ')';
;; css-expression:
;; '+' | '-' | DIMENSION | NUMBER | STRING | IDENT
(defun esxml-query-css-escape (string)
"Returns escaped version of STRING for use in selectors.
The logic used here corresponds to the CSS.escape API as
specified in https://drafts.csswg.org/cssom/#the-css.escape()-method."
(let (chars)
(dotimes (i (length string))
(let* ((char (aref string i))
(unprintablep (or (and (>= char ?\u0001) (<= char ?\u001f))
(= char ?\u007f)))
(nonasciip (>= char ?\u0080))
(digitp (and (>= char ?\u0030) (<= char ?\u0039)))
(upperp (and (>= char ?\u0041) (<= char ?\u005a)))
(lowerp (and (>= char ?\u0061) (<= char ?\u007a))))
(cond
((= char ?\u0000)
(push ?\ufffd chars))
(unprintablep
(dolist (char (string-to-list (format "\\%x " char)))
(push char chars)))
((and (= i 0) digitp)
(dolist (char (string-to-list (format "\\%x " char)))
(push char chars)))
((and (= i 1) digitp (= (aref string 0) ?-))
(dolist (char (string-to-list (format "\\%x " char)))
(push char chars)))
((and (= i 0) (= char ?-) (= (length string) 1))
(push ?\\ chars)
(push char chars))
((or nonasciip (= char ?-) (= char ?_) digitp upperp lowerp)
(push char chars))
(t
(push ?\\ chars)
(push char chars)))))
(concat (nreverse chars))))
(defun esxml--parse-css-identifier (string)
;; https://www.w3.org/TR/css-syntax-3/#consume-string-token
(let* ((code-points (string-to-list string))
chars
token)
(while code-points
(let ((char (pop code-points)))
(if (= char ?\\)
(let ((char (pop code-points)))
(cond
((not char))
((= char ?\n))
((or (and (>= char ?0) (<= char ?9))
(and (>= char ?a) (<= char ?f))
(and (>= char ?A) (<= char ?F)))
(let ((i 0)
(hex-chars (list char)))
(while (and (< i 5) code-points)
(let ((char (car code-points)))
(if (or (and (>= char ?0) (<= char ?9))
(and (>= char ?a) (<= char ?f))
(and (>= char ?A) (<= char ?F)))
(push (pop code-points) hex-chars)
(setq i 5)))
(setq i (1+ i)))
(let ((char (car code-points)))
(when (and char (= char ?\s))
(pop code-points)))
(let* ((hex-token (concat (nreverse hex-chars)))
(code-point (string-to-number hex-token 16)))
(if (or (zerop code-point)
(and (>= code-point ?\ud800) (<= code-point ?\udfff))
(> code-point ?\U0010ffff))
(push ?\ufffd chars)
(push code-point chars)))))
(t ; unspecified: non-hex digit
(push char chars))))
(push char chars))))
(concat (nreverse chars))))
(defun esxml--parse-css-string-literal (string)
(esxml--parse-css-identifier (substring string 1 -1)))
(defmacro esxml--with-parse-shorthands (&rest body)
`(cl-macrolet ((peek () '(car esxml--token-stream))
(next () '(pop esxml--token-stream))
(accept (type) `(and (peek) (eq (car (peek)) ,type)
(cdr (next))))
(eat-whitespace () '(while (accept 'whitespace))))
,@body))
(def-edebug-spec esxml--with-parse-shorthands (body))
(defun esxml-parse-css-selector (string)
"Parse CSS selector STRING into a list of alists.
Each alist represents a complex CSS selector. The result can be
passed to `esxml-query' and `esxml-query-all' as the selector
argument."
(let* ((esxml--token-stream (esxml--tokenize-css-selector string))
(result (esxml--parse-css-selector-list)))
(when esxml--token-stream
(error "Trailing garbage: %s"
(mapconcat 'cdr esxml--token-stream "")))
result))
(defun esxml--parse-css-selector-list ()
(esxml--with-parse-shorthands
(let ((first (esxml--parse-complex-css-selector))
result)
(when (not first)
(error "Expected at least one selector"))
(push first result)
(while (accept 'comma)
(eat-whitespace)
(let ((selector (esxml--parse-complex-css-selector)))
(when (not selector)
(error "Expected selector after comma"))
(push selector result)))
(nreverse result))))
(defun esxml--parse-complex-css-selector ()
(esxml--with-parse-shorthands
(let ((first (esxml--parse-compound-css-selector))
result done)
(when first
(push first result)
(while (not done)
(let ((combinator (esxml--parse-css-combinator)))
(if combinator
(let ((compound (esxml--parse-compound-css-selector)))
(cond
(compound
(setq result (append (list compound combinator) result)))
;; allow whitespace before comma
((not (eq (car (peek)) 'comma))
(error "Trailing combinator"))))
(setq done t))))
(nreverse result)))))
(defun esxml--parse-css-combinator ()
(esxml--with-parse-shorthands
;; NOTE: whitespace-surrounded combinators are distinguished from
;; whitespace-only ones by checking whether there has been
;; whitespace followed by a non-blank combinator
(let ((leading-whitespace-p (eq (car (peek)) 'whitespace))
result)
(eat-whitespace)
(let ((type (car (peek))))
(cond
((member type '(gt plus tilde))
(next)
(cond
((eq type 'gt)
(setq result '((combinator . child))))
((eq type 'plus)
(setq result '((combinator . direct-sibling))))
((eq type 'tilde)
(setq result '((combinator . indirect-sibling)))))
(eat-whitespace))
(leading-whitespace-p
(setq result '((combinator . descendant))))
(t nil)))
result)))
(defun esxml--parse-compound-css-selector ()
(esxml--with-parse-shorthands
(let ((type-selector (esxml--parse-css-type-selector))
done
result)
;; NOTE: css-type-selector css-modifier* | css-modifier+; is
;; equivalent to: [ css-type-selector | css-modifier ] css-modifier*;
(if type-selector
(push type-selector result)
(let ((modifier (esxml--parse-css-modifier)))
(if modifier
(push modifier result)
;; NOTE: this allows the trailing combinator error to be thrown
(setq done t))))
(while (not done)
(let ((modifier (esxml--parse-css-modifier)))
(if modifier
(push modifier result)
(setq done t))))
(when (> (cl-count 'id result :key 'car) 1)
(error "Only one id selector allowed per compound"))
(nreverse result))))
(defun esxml--parse-css-type-selector ()
(esxml--with-parse-shorthands
(let ((token (peek)))
(cond
((eq (car token) 'ident)
(next)
(cons 'tag (intern (esxml--parse-css-identifier (cdr token)))))
((eq (car token) 'asterisk)
(next)
'(wildcard))
(t nil)))))
(defun esxml--parse-css-modifier ()
(or (esxml--parse-css-id)
(esxml--parse-css-class)
(esxml--parse-css-attrib)
(esxml--parse-css-pseudo)))
(defun esxml--parse-css-id ()
(esxml--with-parse-shorthands
(let ((value (accept 'hash)))
(when value
(cons 'id (substring value 1))))))
(defun esxml--parse-css-class ()
(esxml--with-parse-shorthands
(when (accept 'period)
(let ((value (accept 'ident)))
(if value
(cons 'class value)
(error "Expected identifier after period"))))))
(defun esxml--parse-css-attrib ()
(esxml--with-parse-shorthands
(let (result)
(when (accept 'lbracket)
(eat-whitespace)
(let ((name (esxml--parse-css-attrib-name)))
(when (not name)
(error "Expected attribute name"))
(push (cons 'name (esxml--parse-css-identifier name)) result)
(when (not (accept 'rbracket))
(let ((match (esxml--parse-css-attrib-match)))
(when (not match)
(error "Expected attribute matcher"))
(let ((value (esxml--parse-css-attrib-value)))
(when (not value)
(error "Expected attribute value"))
(eat-whitespace)
(when (not (accept 'rbracket))
(error "Unterminated attribute"))
(push (cons match value) result)))))
(cons 'attribute (nreverse result))))))
(defun esxml--parse-css-attrib-name ()
(esxml--with-parse-shorthands
(let ((name (accept 'ident)))
(when name
(eat-whitespace)
name))))
(defun esxml--parse-css-attrib-match ()
(esxml--with-parse-shorthands
(let (result)
(cond
((accept 'equals)
(setq result 'exact-match))
((accept 'prefix-match)
(setq result 'prefix-match))
((accept 'suffix-match)
(setq result 'suffix-match))
((accept 'substring-match)
(setq result 'substring-match))
((accept 'include-match)
(setq result 'include-match))
((accept 'dash-match)
(setq result 'dash-match)))
(eat-whitespace)
result)))
(defun esxml--parse-css-attrib-value ()
(esxml--with-parse-shorthands
(let ((token (peek)))
(cond
((eq (car token) 'ident)
(next)
(esxml--parse-css-identifier (cdr token)))
((eq (car token) 'string)
(next)
(esxml--parse-css-string-literal (cdr token)))
(t nil)))))
(defun esxml--parse-css-pseudo ()
(esxml--with-parse-shorthands
(let (result type)
(when (accept 'colon)
(if (accept 'colon)
(setq type 'pseudo-element)
(setq type 'pseudo-class))
(let ((functional (esxml--parse-css-functional-pseudo)))
(if functional
(if (eq type 'pseudo-class)
(let ((value (car functional))
(args (cdr functional)))
(push (cons 'name (esxml--parse-css-identifier value)) result)
(push (cons 'args args) result))
(error "Pseudo-elements may not have arguments"))
(let ((value (accept 'ident)))
(if value
(push (cons 'name (esxml--parse-css-identifier value)) result)
(error "Expected function or identifier")))))
(cons type (nreverse result))))))
(defun esxml--parse-css-functional-pseudo ()
(esxml--with-parse-shorthands
(let ((function (accept 'function))
result)
(when function
(push (substring function 0 -1) result)
(eat-whitespace)
(let ((expression (esxml--parse-css-expression))
done)
(eat-whitespace)
(when (not expression)
(error "Expected at least one expression for function"))
(push expression result)
(while (not done)
(setq expression (esxml--parse-css-expression))
(if expression
(progn
(push expression result)
(eat-whitespace))
(setq done t))))
(when (not (accept 'rparen))
(error "Unterminated function argument list"))
(nreverse result)))))
(defun esxml--parse-css-expression ()
(esxml--with-parse-shorthands
(let ((token (peek)))
(cond
((accept 'plus)
'(operator . +))
((accept 'minus)
'(operator . -))
((eq (car token) 'dimension)
(next)
(cons 'dimension (esxml--parse-css-identifier (cdr token))))
((eq (car token) 'number)
(next)
(cons 'number (string-to-number (cdr token))))
((eq (car token) 'string)
(next)
(cons 'string (esxml--parse-css-string-literal (cdr token))))
((eq (car token) 'ident)
(next)
(cons 'ident (esxml--parse-css-identifier (cdr token))))
(t nil)))))
;;; tree traversal
;; TODO: these helpers should be part of esxml.el
(defun esxml-branch-p (node)
"Non-nil if NODE refers to an esxml branch."
(and (listp node)
(>= (length node) 2)
(symbolp (car node))
(listp (cadr node))))
(defun esxml-node-tag (node)
"Returns the tag of NODE if available."
(and (esxml-branch-p node)
(car node)))
(defun esxml-node-attributes (node)
"Returns the attributes of NODE if available."
(and (esxml-branch-p node)
(cadr node)))
(defun esxml-node-attribute (attribute node)
"Returns the attribute ATTRIBUTE of NODE if available."
(and (esxml-branch-p node)
(cdr (assq attribute (cadr node)))))
(defun esxml-node-children (node)
"Returns the children of NODE if available."
(and (esxml-branch-p node)
(nthcdr 2 node)))
(defun esxml-find-node (pred root)
"Locates a node satisfying PRED starting from ROOT.
Returns the node or nil if none found."
(if (funcall pred root)
root
(cl-some (lambda (node) (esxml-find-node pred node))
(esxml-node-children root))))
(defun esxml-visit-nodes (function root)
"Visit nodes by calling FUNCTION on each starting from ROOT."
(funcall function root)
(mapc (lambda (node) (esxml-visit-nodes function node))
(esxml-node-children root)))
(defun esxml-find-nodes (pred root)
"Locates all nodes satisfying PRED starting from ROOT.
Returns a list of the nodes or nil if none found."
(let ((acc '()))
(esxml-visit-nodes
(lambda (node)
(when (funcall pred node)
(push node acc)))
root)
(nreverse acc)))
(defun esxml-find-descendant (pred root)
"Locates a node satisfying PRED starting from ROOT's children.
Returns the node or nil if none found."
(cl-some (lambda (node) (esxml-find-node pred node))
(esxml-node-children root)))
(defun esxml-find-descendants (pred root)
"Locates all nodes satisfying PRED starting from ROOT's children.
Returns a list of the nodes or nil if none found."
(cl-mapcan (lambda (node) (esxml-find-nodes pred node))
(esxml-node-children root)))
(defun esxml-find-child (pred root)
"Locates a node satisfying PRED among ROOT's children.
Returns the node or nil if none found."
(cl-some (lambda (node) (when (funcall pred node) node))
(esxml-node-children root)))
(defun esxml-find-children (pred root)
"Locates all nodes satisfying PRED among ROOT's children.
Returns a list of the nodes or nil if none found."
(mapcar (lambda (node) (when (funcall pred node) node))
(esxml-node-children root)))
(defun esxml--node-with-children (node children)
(let ((tag (esxml-node-tag node))
(attributes (esxml-node-attributes node)))
(append (list tag attributes) children)))
(defun esxml--node-with-attributes (node attributes)
(let ((tag (esxml-node-tag node))
(children (esxml-node-children node)))
(append (list tag attributes) children)))
(defun esxml-tree-map (function root)
"Returns a copy of ROOT with FUNCTION applied to each node."
(if (esxml-branch-p root)
(esxml--node-with-children
(funcall function root)
(mapcar (lambda (node) (esxml-tree-map function node))
(esxml-node-children root)))
(funcall function root)))
(defvar esxml--symbol (make-symbol "id"))
(defun esxml--decorate-tree (root)
(let ((i 0))
(esxml-tree-map
(lambda (node)
(let ((attribute (cons esxml--symbol i))
(attributes (esxml-node-attributes node)))
(setq attributes (append (list attribute) attributes))
(setq i (1+ i))
(if (esxml-branch-p node)
(esxml--node-with-attributes node attributes)
node)))
root)))
(defun esxml--undecorate-node (node)
(if (esxml-branch-p node)
(let ((attributes (esxml-node-attributes node)))
(esxml--node-with-attributes node (assq-delete-all esxml--symbol
attributes)))
node))
(defun esxml--retrieve-decoration (node)
(esxml-node-attribute esxml--symbol node))
;;; querying
;; NOTE: supporting structural pseudo functions, direct siblings and
;; indirect siblings requires breadth instead of depth traversal,
;; something that could be emulated without zippers if you had the
;; parent of the node (and the position of the child)...
(defun esxml--node-matches-attribute-p (node modifier)
(let ((attributes (esxml-node-attributes node))
haystack)
(cl-every
(lambda (item)
(let ((type (car item))
(value (cdr item)))
(cond
((eq type 'name)
(let ((match (assq (intern value) attributes)))
(setq haystack (cdr match))
match))
((eq type 'exact-match)
(equal haystack value))
((eq type 'prefix-match)
(string-prefix-p value haystack))
((eq type 'suffix-match)
(string-suffix-p value haystack))
((eq type 'substring-match)
(string-match-p (regexp-quote value) haystack))
((eq type 'include-match)
(member value (split-string haystack " ")))
((eq type 'dash-match)
(or (equal value haystack)
(string-match-p (format "^%s-" (regexp-quote value)) haystack)))
(t (error "Unknown attribute modifier")))))
modifier)))
(defun esxml--node-matches-modifier-p (node type value)
(cond
((eq type 'wildcard)
t)
((eq type 'tag)
(equal (esxml-node-tag node) value))
((eq type 'id)
(equal (esxml-node-attribute 'id node) value))
((eq type 'class)
(let ((class (esxml-node-attribute 'class node)))
(and class (member value (split-string class " ")))))
((eq type 'attribute)
(esxml--node-matches-attribute-p node value))
;; TODO: support structural pseudo functions
;; TODO: error out on invalid pseudo-class arguments
(t (error "Unimplemented attribute type: %s" type))))
(defun esxml--find-node-for (attributes)
(lambda (node)
(cl-every
(lambda (attribute)
(let ((type (car attribute))
(value (cdr attribute)))
(esxml--node-matches-modifier-p node type value)))
attributes)))
(defun esxml--find-nodes (root combinator attributes)
(let* ((type (cdr (assq 'combinator combinator)))
(walker (cond
((not type)
'esxml-find-nodes)
((eq type 'descendant)
'esxml-find-descendants)
((eq type 'child)
'esxml-find-children)
;; TODO: support direct sibling
;; TODO: support indirect sibling
(t (error "Unimplemented combinator %s" combinator)))))
(funcall walker (esxml--find-node-for attributes) root)))
(defun esxml--query (selector root)
(let* ((attributes (pop selector))
combinator
(result (esxml--find-nodes root nil attributes)))
(while (and result selector)
(setq combinator (pop selector))
(setq attributes (pop selector))
(setq result (cl-mapcan
(lambda (node)
(esxml--find-nodes node combinator attributes))
result))
(setq result (delq nil result)))
result))
(defun esxml--delete-dups (items test)
(let ((seen (make-hash-table :test test))
result)
(while items
(let ((item (pop items)))
(when (not (gethash item seen))
(push item result)
(puthash item t seen))))
(nreverse result)))
(defun esxml-query-all (selector root)
"Locates all nodes satisfying SELECTOR starting from ROOT.
SELECTOR must be a string containing a CSS selector or a parsed
CSS selector returned by `esxml-parse-css-selector'. Returns a
list of the nodes or nil if none found."
(when (stringp selector)
(setq selector (esxml-parse-css-selector selector)))
(if (= (length selector) 1)
;; no commas, we can only get the same nodes repeatedly
(esxml--delete-dups (esxml--query (car selector) root) 'eq)
;; commas, nodes might be the same *and* in the wrong order
(setq root (esxml--decorate-tree root))
(let (result)
(while selector
(setq result (nconc result (esxml--query (pop selector) root))))
(setq result (cl-sort result '< :key 'esxml--retrieve-decoration))
(setq result (cl-delete-duplicates result :test '=
:key 'esxml--retrieve-decoration))
(mapcar (lambda (node) (esxml--undecorate-node node)) result))))
(defun esxml-query (selector root)
"Locates a node satisfying SELECTOR starting from ROOT.
SELECTOR must be a string containing a CSS selector or a parsed
CSS selector returned by `esxml-parse-css-selector'. Returns the
node or nil if none found."
;; NOTE: you can do a bit less work (the savings decrease the more
;; branches the query discards), but it's simpler and safer to just
;; have the same algorithm for both entry points
(car (esxml-query-all selector root)))
(provide 'esxml-query)
;;; esxml-query.el ends here