((section 2 "Outdated egg!" (p "This is an egg for CHICKEN 4, the unsupported old release.  You're almost certainly looking for " (int-link "/eggref/5/dataset-utils" "the CHICKEN 5 version of this egg") ", if it exists.") (p "If it does not exist, there may be equivalent functionality provided by another egg; have a look at the " (link "https://wiki.call-cc.org/chicken-projects/egg-index-5.html" "egg index") ". Otherwise, please consider porting this egg to the current version of CHICKEN.")) (section 2 "Dataset Utilities" (p "A set of routines to load and manage datasets for machine learning / data mining tasks.") (p "A dataset is a table:") (hr) (table (tr (th "Outlook") (th "Temperature") (th "Humidity") (th "Windy") (th "Plays")) "\n" (tr (td "sunny") (td "hot") (td "high") (td "false") (td "no")) "\n" (tr (td "sunny") (td "hot") (td "high") (td "true") (td "no"))) (hr) (p "Each column in the table is an " (i "attribute") ", and each row is an " (i "instance") ".  Instances have values for each attribute.  The whole table is called a " (i "relation") ", and can be given a name.") (section 3 "Exported Procedures" (section 4 "Creating datasets" (def (sig (procedure "(make-nominal-attribute name value-1 ...)" (id make-nominal-attribute))) (p "Creates a nominal attribute with given values, e.g.:") (pre "> (make-nominal-attribute 'outlook 'sunny 'overcast 'rainy)")) (def (sig (procedure "(make-numeric-attribute name)" (id make-numeric-attribute))) (p "Creates a numeric attribute, e.g.:") (pre "> (make-numeric-attribute 'temperature)")) (def (sig (procedure "(make-relation name attributes data)" (id make-relation))) (p "Creates a relation with given " (tt "name") ".  The " (tt "attributes") " must be a list of attribute instances, and the " (tt "data") " are a list of lists: each sublist representing an instance, and giving the value for that instance of every attribute.") (pre "> (make-relation 'plays-tennis\n                  (list (make-nominal-attribute 'outlook 'sunny 'overcast 'rainy)\n                        (make-nominal-attribute 'temperature 'hot 'mild 'cool)\n                        (make-nominal-attribute 'humidity 'high 'normal)\n                        (make-nominal-attribute 'windy 'true 'false)\n                        (make-nominal-attribute 'plays 'yes 'no))\n                  '((sunny hot high false no)\n                    (sunny hot high true no)\n                    (overcast hot high false yes)\n                    ...\n                    (rainy mild high true no)))"))) (section 4 "Managing datasets" (def (sig (procedure "(attribute-name attribute)" (id attribute-name))) (p "Returns the name of given attribute.")) (def (sig (procedure "(attribute-definition attribute)" (id attribute-definition))) (p "Returns a definition of the type of given attribute.  This definition will be one of:") (ul (li (tt "'(numeric)") " for numeric attributes") (li (tt "'(nominal value-1 ...)") " for nominal attributes, listing the possible values"))) (def (sig (procedure "(class-probability relation attribute-name value)" (id class-probability))) (p "Returns the proportion of instances with the given attribute value.")) (def (sig (procedure "(entropy relation attribute-name)" (id entropy))) (p "Computes entropy of given relation, using " (tt "attribute-name") " to divide the relation into groups.  " (tt "attribute-name") " should be a nominal attribute.")) (def (sig (procedure "(filter-instances relation attribute-name value)" (id filter-instances))) (p "Returns a new relation containing those instances of relation which have the given value for attribute-name.")) (def (sig (procedure "(find-attribute-index relation attribute-name)" (id find-attribute-index))) (p "Returns the index number of given attribute name in relation.")) (def (sig (procedure "(get-attribute-values relation attribute-name)" (id get-attribute-values))) (p "Returns the values taken by instances in relation for given attribute name.")) (def (sig (procedure "(information-gain relation target-class attribute-name)" (id information-gain))) (p "Computes the information gain from using the given " (tt "attribute-name") " to split the data in " (tt "relation") " over the entropy of the data as they are; " (tt "target-class") " is used to compute the entropy.")) (def (sig (procedure "(relation-attributes relation)" (id relation-attributes))) (p "Returns a list of attributes for given relation.")) (def (sig (procedure "(relation-data relation)" (id relation-data))) (p "Returns a list of the instances in the given relation.")) (def (sig (procedure "(relation-name relation)" (id relation-name))) (p "Returns the name of given relation.")) (def (sig (procedure "(split-instances relation attribute-name)" (id split-instances))) (p "Given a nominal attribute, returns a list of relations, each representing instances in " (tt "relation") " with the same value for given " (tt "attribute-name") "."))) (section 4 "Metrics" (def (sig (procedure "(euclidean-distance instance-1 instance-2)" (id euclidean-distance))) (p "Computes the euclidean distance between the two instances."))) (section 4 "Importing Data" (def (sig (procedure "(read-arff filename)" (id read-arff))) (p "Reads an ARFF definition from given filename, and returns a relation.  Currently supports nominal and numeric attribute types, and not sparse files.")))) (section 3 "Author" (p (int-link "/users/peter-lane" "Peter Lane") ".")) (section 3 "License" (p "GPL version 3.0.")) (section 3 "Version History" (p "in trunk."))))