;;; ;;; Nominal Attribute Decision Tree Tools ;;; ;;; Decision trees are implemented here as trees of CONSes. Leaves ;;; are atoms which indicate the category label assigned to a given ;;; instance object. Internal nodes are lists, each containing the ;;; name of the nominal attribute to be examined at the node and a ;;; sequence of subtrees tagged with their corresponding attribute ;;; values. For example, this is the decision tree for Figure 18.8 ;;; of Russell & Norvig (1995): ;;; ;;; (Patrons ;;; (None . No) ;;; (Some . Yes) ;;; (Full . (Hungry ;;; (Yes . No) ;;; (No . (Type ;;; (French . Yes) ;;; (Italian . No) ;;; (Burger . Yes) ;;; (Thai . (Fri-Sat ;;; (No . No) ;;; (Yes. Yes)))))))) ;;; ;;; Example instances are implemented here as Lisp association lists, ;;; with attributes associated with their values. An example instance ;;; might be: ;;; ;;; ((Alternate . No) (Bar . Yes) (Fri-Sat . Yes) (Hungry . No) ;;; (Patrons . Full) (Price . 2) (Raining . Yes) (Reservation . No) ;;; (Type . French) (WaitEstimate . 10-30)) ;;; ;;; See the code, below, for details. ;;; ;;; DAVID NOELLE Wed May 15 12:38:07 1996 ;;; ;;; ;;; Package ;;; (cl::defpackage "DTREE" (:export "INSTANCE-ATTRIBUTE-VALUE" "IS-VALID-INSTANCE-P" "LABELED-INSTANCE-CATEGORY" "LABELED-INSTANCE-FEATURE-VALUES" "IS-VALID-LABELED-INSTANCE-P" "CONVERT-POSITION-INSTANCE-TO-STANDARD-FORMAT" "CONVERT-STANDARD-LABELED-INSTANCE-TO-POSITION-FORMAT" "CONVERT-POSITION-SET-TO-STANDARD-SET" "CONVERT-STANDARD-SET-TO-POSITION-SET" "ALL-ATTRIBUTES-OF-STANDARD-SET" "ALL-ATTRIBUTES-OF-POSITION-SET" "CATEGORIES-OF-LABELED-INSTANCES" "PARTITION-SET-BY-ATTRIBUTE-VALUE" "COUNT-INSTANCES-IN-CATEGORY" "LEAF-NODE-P" "NODE-ATTRIBUTE" "NODE-TAGGED-SUBTREES" "TAGGED-SUBTREE-TAG" "TAGGED-SUBTREE-SUBTREE" "NODE-ATTRIBUTE-VALUE-SUBTREE" "IS-VALID-DECISION-TREE-P" "*PRINT-DECISION-TREE-INDENT-SPACES*" "PRINT-DECISION-TREE" "CLASSIFY-INSTANCE" "CLASSIFY-LABELED-INSTANCES" "CROSS-VALIDATE" "CROSS-VALIDATE-AND-PRINT" "*ATTRIBUTE-LIST*" "ALTERNATE" "BAR" "FRI-SAT" "HUNGRY" "PATRONS" "PRICE" "RAINING" "RESERVATION" "TYPE" "WAITESTIMATE" "*EXAMPLES-BY-POSITION*" "YES" "NO" "SOME" "FULL" "NONE" "FRENCH" "THAI" "BURGER" "ITALIAN" "0-10" "10-30" "30-60" ">60" "*DECISION-TREE*" "CLASSIFY-RESTAURANT-SITUATIONS")) (cl::in-package "DTREE") ;;; ;;; Object Instance Tools ;;; ;;; ;;; These tools deal with two kinds of example instances: vanilla ;;; "instances" and "labeled instances". Regular instances are simply ;;; Lisp association lists, with attributes associated with attribute ;;; values. Labeled instances tack a category label onto the front of ;;; a regular instance using a single CONS cell. For example, a ;;; regular instance might be: ;;; ;;; ((big . yes) (charming . no) (hairy . yes) (cute . no)) ;;; ;;; ... and a labeled example might be something like ... ;;; ;;; (wumpus (big . yes) (charming . no) (hairy . yes) (cute . no)) ;;; ;;; instance-attribute-value -- Returns the attribute value for the given ;;; object and attribute. The object must be a thing which causes the ;;; "is-valid-instance-p" function, below, to return true. This function ;;; returns nil if the attribute is not defined for the object. (defmacro instance-attribute-value (object attribute) `(cdr (assoc ,attribute ,object))) ;;; is-valid-instance-p -- Returns non-nil iff the given "object" is ;;; a valid instance using only the atomic attributes given. The ;;; "all-attributes" argument should be an association list which ;;; associates every allowable attribute to a list of allowable values ;;; for that attribute. An example value for "all-attributes" would be: ;;; ;;; ((Alternate . (Yes No)) ;;; (Bar . (Yes No)) ;;; (Fri-Sat . (Yes No)) ;;; (Hungry . (Yes No)) ;;; (Patrons . (None Some Full)) ;;; (Price . (1 2 3)) ;;; (Raining . (Yes No)) ;;; (Reservation . (Yes No)) ;;; (Type . (French Italian Thai Burger)) ;;; (WaitEstimate . (0-10 10-30 30-60 >60))) ;;; ;;; Instances must include an attribute value for every allowable ;;; attribute in order to be valid! ;;; (defun is-valid-instance-p (object all-attributes) (declare (type list all-attributes)) (and (listp object) (loop for attribute in all-attributes always (member (instance-attribute-value object (car attribute)) (cdr attribute))) t)) ;;; labeled-instance-category -- Returns the category label of a ;;; labeled example instance. (defmacro labeled-instance-category (object) `(car ,object)) ;;; labeled-instance-feature-values -- Returns the unlabeled object ;;; portion of the given labeled instance. (defmacro labeled-instance-feature-values (object) `(cdr ,object)) ;;; is-valid-labeled-instance-p -- Returns non-nil iff the given ;;; object is a valid labeled instance, based on the given association ;;; list of attributes (see "is-valid-instance-p", above) and the ;;; given list of valid category labels. (defun is-valid-labeled-instance-p (object all-attributes all-categories) (declare (type list all-attributes all-categories)) (and (is-valid-instance-p (labeled-instance-feature-values object) all-attributes) (member (labeled-instance-category object) all-categories) t)) ;;; ;;; Labeled Instance Set Tools ;;; ;;; ;;; These tools are for managing collections of labeled instances. ;;; The sets are implemented as lists. There are two formats supported ;;; here. First, there is the standard format, for which every element ;;; of the set causes "is-valid-labeled-instance-p" to return true. ;;; The second format is called "position format" because the attribute ;;; names are not stored in the instances, but attribute values are ;;; ordered in each instance, instead. The category label still appears ;;; as the first element of a "position format" instance. Most of the ;;; utilities in this file require instances to be in standard format, ;;; but there are some functions, below, which convert "position format" ;;; instances into standard format. ;;; ;;; convert-position-instance-to-standard-format -- Does exactly ;;; what the name says. The given association list of attribute ;;; names must be in the same order as attribute values in the input ;;; "position format" instance. The "all-attributes" argument should be ;;; an association list which associates every allowable attribute to a ;;; list of allowable values for that attribute, as in the function ;;; "is-valid-instance-p", above. (defun convert-position-instance-to-standard-format (object all-attributes) (declare (type list object all-attributes)) (cons (car object) (loop for attribute-description in all-attributes as value in (cdr object) nconc (if (member value (cdr attribute-description)) (list (cons (car attribute-description) value)) (progn (format *error-output* "~&ERROR: ~S is not a valid value for attribute ~S." value (car attribute-description)) nil))))) ;;; convert-standard-labeled-instance-to-position-format -- Does exactly ;;; what the name says. This is the inverse of the previous function. ;;; The second argument, "all-attributes", is the same as above. (defun convert-standard-labeled-instance-to-position-format (object all-attributes) (declare (type list object all-attributes)) (cons (car object) (loop for attribute-description in all-attributes collect (let ((value (instance-attribute-value object (car attribute-description)))) (declare (type atom value)) (if (member value (cdr attribute-description)) value (progn (format *error-output* "~&ERROR: ~S is not a valid value for attribute ~S." value (car attribute-description)) nil)))))) ;;; convert-position-set-to-standard-set -- Convert a set of "position ;;; format" labeled instances into a set of standard format labeled ;;; instances. The "all-attributes" argument is as in the functions above. (defun convert-position-set-to-standard-set (position-set all-attributes) (declare (type list position-set all-attributes)) (loop for object in position-set collect (convert-position-instance-to-standard-format object all-attributes))) ;;; convert-standard-set-to-position-set -- Convert a set of standard ;;; format labeled instances into a set of "position format" labeled ;;; instances. The "all-attributes" argument is as in the functions above. (defun convert-standard-set-to-position-set (standard-set all-attributes) (declare (type list standard-set all-attributes)) (loop for object in standard-set collect (convert-standard-labeled-instance-to-position-format object all-attributes))) ;;; add-value-to-alist -- This is a utility function used in some of ;;; the functions, below. It takes an association list as an argument ;;; and adds the given "value" to the list of values associated with the ;;; given "attribute". The function returns the updated alist. (defun add-value-to-alist (alist attribute value) (declare (type list alist) (type symbol attribute) (type atom value)) (let ((binding (assoc attribute alist))) (declare (type list binding)) (if binding (pushnew value (cdr binding)) (setq alist (acons attribute (list value) alist))) alist)) ;;; all-attributes-of-standard-set -- Given a set of labeled instances ;;; in standard format, return an association list which associates all ;;; symbolic attribute names appearing in the set with all of the ;;; corresponding values. In essence, this function extracts an ;;; "all-attributes" association list from a set of labeled instances. (defun all-attributes-of-standard-set (set &optional (old-attributes nil)) (declare (type list set)) (let ((all-attributes (copy-tree old-attributes))) (declare (type list all-attributes)) (loop for object in set do (loop for binding in (cdr object) do (setq all-attributes (add-value-to-alist all-attributes (car binding) (cdr binding))))) ;; Reverse these, just in case the order makes a difference ... (nreverse all-attributes))) ;;; all-attributes-of-position-set -- This is like the previous function, ;;; only it takes a "position format" instance set as input. Since such ;;; "position format" instances do not contain attribute names, this ;;; function requires a second argument -- an ordered list of attribute ;;; names. (defun all-attributes-of-position-set (set attribute-names &optional (old-attributes nil)) (declare (type list set attribute-names)) (let ((all-attributes (copy-tree old-attributes))) (declare (type list all-attributes)) (loop for object in set do (loop for value in (cdr object) as attribute in attribute-names do (setq all-attributes (add-value-to-alist all-attributes attribute value)))) ;; Need to reverse this to keep attributes in order ... (nreverse all-attributes))) ;;; categories-of-labeled-instances -- Given a set of labeled instances ;;; in either format, return a list of all of the category labels used ;;; anywhere in the set. (defun categories-of-labeled-instances (set) (declare (type list set)) (remove-duplicates (mapcar #'car set))) ;;; partition-set-by-attribute-value -- Given a set of labeled instances ;;; in standard format, partition the set into subsets based on the value ;;; of the given attribute. Return an association list which associates ;;; the various nominal values for this attribute with the corresponding ;;; subset of labeled instances. The returned CONS cells are fresh. (defun partition-set-by-attribute-value (set attribute) (declare (type list set) (type symbol attribute)) (let ((association-list nil)) (declare (type list association-list)) (loop for object in set do (setq association-list (add-value-to-alist association-list (instance-attribute-value (labeled-instance-feature-values object) attribute) (copy-tree object)))) association-list)) ;;; count-instances-in-category -- Return the number of instances in ;;; the given set of labeled instances (in either format) which have ;;; the given category label assigned to them. (defun count-instances-in-category (set label) (declare (type list set) (type atom label)) (loop for object in set count (eql (labeled-instance-category object) label))) ;;; ;;; Decision Tree Utilization Tools ;;; ;;; leaf-node-p -- Returns non-nil if the given "node" could be ;;; the leaf of a decision tree. (defmacro leaf-node-p (node) `(atom ,node)) ;;; node-attribute -- Returns the name of the attribute to be ;;; examined at the given internal "node" of a decision tree. (defmacro node-attribute (node) `(when (consp ,node) (first ,node))) ;;; node-subtrees -- Returns a list of subtrees accessible from ;;; this node, each tagged with their appropriate attribute value. ;;; Note that the CONS cells are *not* fresh, so they should not be ;;; modified. (defmacro node-tagged-subtrees (node) `(when (consp ,node) (rest ,node))) ;;; tagged-subtree-tag -- Returns the attribute value tag associated ;;; with a tagged subtree. (defmacro tagged-subtree-tag (tagged-subtree) `(when (consp ,tagged-subtree) (car ,tagged-subtree))) ;;; tagged-subtree-subtree -- Returns the actual subtree of a tagged ;;; subtree. (defmacro tagged-subtree-subtree (tagged-subtree) `(when (consp ,tagged-subtree) (cdr ,tagged-subtree))) ;;; node-attribute-value-subtree -- Returns the subtree of the given ;;; internal node corresponding to the given attribute value. (defmacro node-attribute-value-subtree (node value) `(when (listp ,node) (cdr (assoc ,value (node-tagged-subtrees ,node))))) ;;; is-valid-decision-tree-p -- Returns non-nil iff the given "tree" ;;; is a valid decision tree over the space of attributes provided ;;; in the association list given by "all-attributes" and over the space ;;; of symbolic category labels given in the list "all-categories". ;;; The "all-attributes" argument should be an association list which ;;; associates every allowable attribute to a list of allowable values ;;; for that attribute, as in "is-valid-instance-p", above. (defun is-valid-decision-tree-p (tree all-attributes all-categories) (declare (type list all-attributes all-categories)) (if (leaf-node-p tree) ;; Tree might be only a category label leaf ... (member tree all-categories) ;; Tree might be an internal node ... (let* ((attribute (node-attribute tree)) (allowable-values (when attribute (cdr (assoc attribute all-attributes))))) (and attribute (symbolp attribute) ;; This implicitly checks if the attribute name is valid ... allowable-values (listp allowable-values) ;; Make sure that there is a branch for all allowable values ... (null (set-difference allowable-values (mapcar #'(lambda (tagged-subtree) (tagged-subtree-tag tagged-subtree)) (node-tagged-subtrees tree)))) ;; Make sure that all tagged subtrees are valid ... (loop for tagged-subtree in (node-tagged-subtrees tree) always (is-valid-decision-tree-p (tagged-subtree-subtree tagged-subtree) all-attributes all-categories)))))) ;;; Number of spaces in each indentation when printing a decision tree ... (defvar *print-decision-tree-indent-spaces* 2) ;;; print-decision-tree -- Outputs a decision tree as nested "CASE" ;;; statements. Output goes to the given "stream". (defun print-decision-tree (decision-tree &optional (stream t) (indent 0)) (declare (type fixnum indent)) (flet ((spaces (n &optional (stream t)) (declare (type fixnum n)) ;; Output "n" leading spaces to "stream" ... (format stream "~&") (dotimes (i n) (format stream " ")))) (declare (type function indent)) (if (leaf-node-p decision-tree) (progn (spaces indent stream) (format stream "Category Label = ~S" decision-tree)) (progn (spaces indent stream) (format stream "If Attribute ~S Is:" (node-attribute decision-tree)) (incf indent *print-decision-tree-indent-spaces*) (loop for tagged-subtree in (node-tagged-subtrees decision-tree) do (spaces indent stream) (format stream "~S Then" (tagged-subtree-tag tagged-subtree)) (print-decision-tree (tagged-subtree-subtree tagged-subtree) stream (+ indent *print-decision-tree-indent-spaces*)))))) stream) ;;; classify-instance -- Provide a category label for the given ;;; unlabeled instance object, given a decision tree. The ;;; "decision-tree" must be a tree for which "is-valid-decision-tree-p" ;;; returns true, and "object" must be an association list for which ;;; "is-valid-instance-p" returns true. (defun classify-instance (decision-tree object) (declare (type list object)) (if (leaf-node-p decision-tree) ;; We've reached a leaf -- return the category label ... decision-tree ;; Branch on the appropriate attribute value of this internal node ... (classify-instance (node-attribute-value-subtree decision-tree (instance-attribute-value object (node-attribute decision-tree))) object))) ;;; classify-labeled-instances -- Given a decision tree and a set of ;;; labeled instances (in standard format), return a copy of the set ;;; with classification labels determined by the decision tree. (defun classify-labeled-instances (decision-tree instance-set) (declare (type list instance-set)) (loop for labeled-object in instance-set collect (let ((object (labeled-instance-feature-values labeled-object))) (declare (type list object)) (cons (classify-instance decision-tree object) (copy-tree object))))) ;;; cross-validate -- Performs a leave-one-out cross validation test ;;; on a given decision tree induction function. The first required ;;; argument should be a decision tree learning function which takes ;;; three required arguments: a standard set of labeled instances, ;;; an "all-attributes" association list (like that passed to ;;; "is-valid-instance-p", above), and a default category label. This ;;; learning function should return an induced decision tree. The ;;; second argument to "cross-validate" should be a standard set of ;;; labeled instances. The third and fourth arguments are the attribute ;;; descriptions and the default category label which should be passed ;;; along to the decision tree learner. This function returns two ;;; values -- the number correct and the number of instances in the ;;; labeled instance set. (defun cross-validate (induction-function instance-set all-attributes default-category) (declare (type function induction-function) (type list instance-set all-attributes) (type symbol default-category)) (let ((number-correct 0) (remaining-instances nil)) (declare (type fixnum number-correct) (type list remaining-instances)) (loop for labeled-object in instance-set do (setq remaining-instances (remove labeled-object instance-set :count 1)) (when (eql (labeled-instance-category labeled-object) (classify-instance (funcall induction-function remaining-instances all-attributes default-category) (labeled-instance-feature-values labeled-object))) (incf number-correct))) (values number-correct (list-length instance-set)))) ;;; cross-validate-and-print -- Performs a leave-one-out cross validation ;;; test on a given decision tree induction function and prints the ;;; resulting cross validation error as a percentage. This real value ;;; is also returned by the function. Arguments are the same as those to ;;; "cross-validate", above. (defun cross-validate-and-print (induction-function instance-set all-attributes default-category &optional (stream t)) (declare (type function induction-function) (type list instance-set all-attributes) (type symbol default-category)) (multiple-value-bind (correct total) (cross-validate induction-function instance-set all-attributes default-category) (declare (type fixnum correct total)) (let ((percent (float (* 100 (/ correct total))))) (declare (type float percent)) (format t "~&Cross Validation Accuracy = ~6,2F~&" percent) percent))) ;;; ;;; Demonstrate Tools ;;; ;;; ;;; We demonstrate these tools using the example from page 531-537 of ;;; Russell & Norvig (1995). ;;; ;;; The attributes from page 532 ... (defvar *attribute-list* '(Alternate Bar Fri-Sat Hungry Patrons Price Raining Reservation Type WaitEstimate)) ;;; The examples from Figure 18.5 ... (defvar *examples-by-position* '((Yes . (Yes No No Yes Some 3 No Yes French 0-10 )) (No . (Yes No No Yes Full 1 No No Thai 30-60)) (Yes . (No Yes No No Some 1 No No Burger 0-10 )) (Yes . (Yes No Yes Yes Full 1 No No Thai 10-30)) (No . (Yes No Yes No Full 3 No Yes French >60 )) (Yes . (No Yes No Yes Some 2 Yes Yes Italian 0-10 )) (No . (No Yes No No None 1 Yes No Burger 0-10 )) (Yes . (No No No Yes Some 2 Yes Yes Thai 0-10 )) (No . (No Yes Yes No Full 1 Yes No Burger >60 )) (No . (Yes Yes Yes Yes Full 3 No Yes Italian 10-30)) (No . (No No No No None 1 No No Thai 0-10 )) (Yes . (Yes Yes Yes Yes Full 1 No No Burger 30-60)))) ;;; The decision tree in Figure 18.8 ... (defvar *decision-tree* '(Patrons (None . No) (Some . Yes) (Full . (Hungry (Yes . No) (No . (Type (French . Yes) (Italian . No) (Burger . Yes) (Thai . (Fri-Sat (No . No) (Yes . Yes))))))))) ;;; classify-restaurant-situations -- Given a decision tree and ;;; a set of "position format" labeled instances, classify each ;;; instance according to the decision tree and output the result. ;;; The third required argument is an ordered list of attribute names. (defun classify-restaurant-situations (&optional (decision-tree *decision-tree*) (position-set *examples-by-position*) (attributes *attribute-list*)) (declare (type list position-set attributes)) (let* ((attribute-descriptions (all-attributes-of-position-set position-set attributes)) (category-labels (categories-of-labeled-instances position-set)) (standard-set (convert-position-set-to-standard-set position-set attribute-descriptions))) (declare (type list attribute-descriptions category-labels standard-set)) (if (not (is-valid-decision-tree-p decision-tree attribute-descriptions category-labels)) (progn (format t "~&ERROR: Decision tree is not valid for this instance set.~&") nil) (let ((solution-set (classify-labeled-instances decision-tree standard-set)) (number-correct 0)) (declare (type list solution-set) (type fixnum number-correct)) (format t "~&~%DECISION TREE:~&~%") (print-decision-tree decision-tree) (format t "~&~%DECISIONS:~&~%") (loop for original-object in standard-set as solution-object in solution-set do (format t "~&~% For Instance ~S" (labeled-instance-feature-values original-object)) (format t "~& Labeled ~S, While Decision Tree Says ~S." (labeled-instance-category original-object) (labeled-instance-category solution-object)) (if (eql (labeled-instance-category original-object) (labeled-instance-category solution-object)) (progn (incf number-correct) (format t " (MATCH!)")) (format t " (ERROR!)"))) (format t "~&~%GOT ~D CORRECT OUT OF ~D." number-correct (list-length standard-set)) (format t "~&~%DONE.~&~%") nil))))