@@ -126,6 +126,7 @@ def attr_num(self, attr):
126126 return attr
127127
128128 def update_values (self ):
129+ """Recompute ``self.values`` (the list of distinct values per attribute) from the examples."""
129130 self .values = list (map (unique , zip (* self .examples )))
130131
131132 def sanitize (self , example ):
@@ -312,6 +313,8 @@ def leave_one_out(learner, dataset, size=None):
312313
313314
314315def learning_curve (learner , dataset , trials = 10 , sizes = None ):
316+ """Return a list of (training-set size, mean accuracy) pairs, obtained by
317+ repeatedly cross-validating the learner on training sets of each given size."""
315318 if sizes is None :
316319 sizes = list (range (2 , len (dataset .examples ) - trials , 2 ))
317320
@@ -363,6 +366,7 @@ def add(self, val, subtree):
363366 self .branches [val ] = subtree
364367
365368 def display (self , indent = 0 ):
369+ """Print this subtree, showing the tested attribute and each branch, indented by ``indent``."""
366370 name = self .attr_name
367371 print ('Test' , name )
368372 for (val , subtree ) in self .branches .items ():
@@ -383,6 +387,7 @@ def __call__(self, example):
383387 return self .result
384388
385389 def display (self ):
390+ """Print the result stored at this leaf."""
386391 print ('RESULT =' , self .result )
387392
388393 def __repr__ (self ):
@@ -397,6 +402,9 @@ def __init__(self, dataset):
397402 self .tree = self .decision_tree_learning (dataset .examples , dataset .inputs )
398403
399404 def decision_tree_learning (self , examples , attrs , parent_examples = ()):
405+ """Recursively build a decision tree: pick the most informative attribute, branch on
406+ its values, and recurse, returning a leaf when examples are empty, all of one class,
407+ or no attributes remain. [Figure 18.5]"""
400408 if len (examples ) == 0 :
401409 return self .plurality_value (parent_examples )
402410 if self .all_same_class (examples ):
@@ -449,6 +457,7 @@ def split_by(self, attr, examples):
449457 return [(v , [e for e in examples if e [attr ] == v ]) for v in self .dataset .values [attr ]]
450458
451459 def predict (self , x ):
460+ """Classify example ``x`` by walking the learned decision tree."""
452461 return self .tree (x )
453462
454463
@@ -497,6 +506,9 @@ def find_examples(self, examples):
497506 return None , None , None
498507
499508 def decision_list_learning (self , examples ):
509+ """Recursively build the decision list: find a test selecting a non-empty subset of
510+ examples sharing one outcome, append (test, outcome), and recurse on the rest;
511+ raise ValueError if no separating test exists. [Figure 18.11]"""
500512 if not examples :
501513 return [((), None )] # catch-all: the empty test matches any example
502514 test , outcome , matched = self .find_examples (examples )
@@ -526,6 +538,8 @@ def predict(self, example):
526538
527539
528540class SVC :
541+ """Support Vector Classifier trained in dual form by solving a quadratic
542+ programming problem; supports arbitrary kernels and a soft-margin penalty ``C``."""
529543
530544 def __init__ (self , kernel = linear_kernel , C = 1.0 , verbose = False ):
531545 self .kernel = kernel
@@ -591,6 +605,8 @@ def predict(self, X):
591605
592606
593607class SVR :
608+ """Support Vector Regressor trained in dual form by solving a quadratic
609+ programming problem, using an epsilon-insensitive loss and penalty ``C``."""
594610
595611 def __init__ (self , kernel = linear_kernel , C = 1.0 , epsilon = 0.1 , verbose = False ):
596612 self .kernel = kernel
@@ -649,12 +665,15 @@ def solve_qp(self, X, y):
649665 self .alphas_n = alphas [m :]
650666
651667 def predict (self , X ):
668+ """Predict the regression target value(s) for the samples ``X``."""
652669 if self .kernel != linear_kernel :
653670 return np .dot (self .alphas_p - self .alphas_n , self .kernel (self .sv , X )) + self .b
654671 return np .dot (X , self .w ) + self .b
655672
656673
657674class MultiClassLearner :
675+ """Wrap a binary classifier ``clf`` to handle multiple classes, using either
676+ the one-vs-rest ('ovr') or one-vs-one ('ovo') decision function."""
658677
659678 def __init__ (self , clf , decision_function = 'ovr' ):
660679 self .clf = clf
@@ -811,9 +830,11 @@ def __init__(self, learners):
811830 self .learners = learners
812831
813832 def train (self , dataset ):
833+ """Train each constituent learner on ``dataset`` and store the resulting predictors."""
814834 self .predictors = [learner (dataset ) for learner in self .learners ]
815835
816836 def predict (self , example ):
837+ """Classify ``example`` by majority vote of the trained predictors."""
817838 return mode (predictor .predict (example ) for predictor in self .predictors )
818839
819840
@@ -847,6 +868,7 @@ def __init__(self, predictors, weights):
847868 self .weights = weights
848869
849870 def predict (self , example ):
871+ """Classify ``example`` by the weighted vote of the predictors."""
850872 return weighted_mode ((predictor .predict (example ) for predictor in self .predictors ), self .weights )
851873
852874
@@ -883,6 +905,7 @@ def feature_bagging(self, p=0.7):
883905 return inputs or self .dataset .inputs
884906
885907 def predict (self , example ):
908+ """Classify ``example`` by majority vote of the forest's decision trees."""
886909 return mode (predictor .predict (example ) for predictor in self .predictors )
887910
888911
@@ -930,11 +953,13 @@ def weighted_replicate(seq, weights, n):
930953# metrics
931954
932955def accuracy_score (y_pred , y_true ):
956+ """Return the fraction of predictions in ``y_pred`` that match ``y_true``."""
933957 assert y_pred .shape == y_true .shape
934958 return np .mean (y_pred == y_true )
935959
936960
937961def r2_score (y_pred , y_true ):
962+ """Return the R^2 (coefficient of determination) of ``y_pred`` against ``y_true``."""
938963 assert y_pred .shape == y_true .shape
939964 return 1. - (np .sum (np .square (y_pred - y_true )) / # sum of square of residuals
940965 np .sum (np .square (y_true - np .mean (y_true )))) # total sum of squares
@@ -964,6 +989,8 @@ def RestaurantDataSet(examples=None):
964989
965990
966991def T (attr_name , branches ):
992+ """Build a DecisionFork testing the restaurant attribute ``attr_name``, wrapping each
993+ non-fork child in a DecisionLeaf; a shorthand for writing decision trees by hand."""
967994 branches = {value : (child if isinstance (child , DecisionFork ) else DecisionLeaf (child ))
968995 for value , child in branches .items ()}
969996 return DecisionFork (restaurant .attr_num (attr_name ), attr_name , print , branches )
0 commit comments