Skip to content

Commit 70f99f5

Browse files
committed
Add docstrings to undocumented public API (logic, utils, learning) (#666)
Document undocumented module-level functions, classes and public methods in logic.py, logic4e.py, utils.py, utils4e.py, learning.py and learning4e.py (187 docstrings) - SAT/CDCL helpers, the wumpus Expr constructors, FOL backward chaining, distance/loss/activation helpers, learner display/predict methods, etc. Descriptions derived from the code; AIMA figure references only where confident. No behaviour changed; docs build with 0 warnings.
1 parent 81c0c27 commit 70f99f5

6 files changed

Lines changed: 263 additions & 0 deletions

File tree

learning.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def attr_num(self, attr):
125125
return attr
126126

127127
def update_values(self):
128+
"""Recompute ``self.values`` (the list of distinct values per attribute) from the examples."""
128129
self.values = list(map(unique, zip(*self.examples)))
129130

130131
def sanitize(self, example):
@@ -316,6 +317,8 @@ def leave_one_out(learner, dataset, size=None):
316317

317318

318319
def learning_curve(learner, dataset, trials=10, sizes=None):
320+
"""Return a list of (training-set size, mean accuracy) pairs, obtained by
321+
repeatedly cross-validating the learner on training sets of each given size."""
319322
if sizes is None:
320323
sizes = list(range(2, len(dataset.examples) - trials, 2))
321324

@@ -367,6 +370,7 @@ def add(self, val, subtree):
367370
self.branches[val] = subtree
368371

369372
def display(self, indent=0):
373+
"""Print this subtree, showing the tested attribute and each branch, indented by ``indent``."""
370374
name = self.attr_name
371375
print('Test', name)
372376
for (val, subtree) in self.branches.items():
@@ -387,6 +391,7 @@ def __call__(self, example):
387391
return self.result
388392

389393
def display(self):
394+
"""Print the result stored at this leaf."""
390395
print('RESULT =', self.result)
391396

392397
def __repr__(self):
@@ -806,6 +811,12 @@ def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid):
806811

807812

808813
def init_examples(examples, idx_i, idx_t, o_units):
814+
"""Split examples into input and target dicts keyed by example index.
815+
816+
Inputs are read from the attribute positions in ``idx_i`` and targets from
817+
position ``idx_t``. When ``o_units`` > 1 each target is one-hot encoded over
818+
``o_units`` units, otherwise it is wrapped in a single-element list. Returns
819+
the pair (inputs, targets)."""
809820
inputs, targets = {}, {}
810821

811822
for i, e in enumerate(examples):
@@ -825,10 +836,13 @@ def init_examples(examples, idx_i, idx_t, o_units):
825836

826837

827838
def find_max_node(nodes):
839+
"""Return the index of the node with the greatest ``value`` attribute."""
828840
return nodes.index(max(nodes, key=lambda node: node.value))
829841

830842

831843
class SVC:
844+
"""Support Vector Classifier trained in dual form by solving a quadratic
845+
programming problem; supports arbitrary kernels and a soft-margin penalty ``C``."""
832846

833847
def __init__(self, kernel=linear_kernel, C=1.0, verbose=False):
834848
self.kernel = kernel
@@ -894,6 +908,8 @@ def predict(self, X):
894908

895909

896910
class SVR:
911+
"""Support Vector Regressor trained in dual form by solving a quadratic
912+
programming problem, using an epsilon-insensitive loss and penalty ``C``."""
897913

898914
def __init__(self, kernel=linear_kernel, C=1.0, epsilon=0.1, verbose=False):
899915
self.kernel = kernel
@@ -953,12 +969,15 @@ def solve_qp(self, X, y):
953969
self.alphas_n = alphas[m:]
954970

955971
def predict(self, X):
972+
"""Predict the regression target value(s) for the samples ``X``."""
956973
if self.kernel != linear_kernel:
957974
return np.dot(self.alphas_p - self.alphas_n, self.kernel(self.sv, X)) + self.b
958975
return np.dot(X, self.w) + self.b
959976

960977

961978
class MultiClassLearner:
979+
"""Wrap a binary classifier ``clf`` to handle multiple classes, using either
980+
the one-vs-rest ('ovr') or one-vs-one ('ovo') decision function."""
962981

963982
def __init__(self, clf, decision_function='ovr'):
964983
self.clf = clf
@@ -1144,11 +1163,13 @@ def weighted_replicate(seq, weights, n):
11441163
# metrics
11451164

11461165
def accuracy_score(y_pred, y_true):
1166+
"""Return the fraction of predictions in ``y_pred`` that match ``y_true``."""
11471167
assert y_pred.shape == y_true.shape
11481168
return np.mean(y_pred == y_true)
11491169

11501170

11511171
def r2_score(y_pred, y_true):
1172+
"""Return the R^2 (coefficient of determination) of ``y_pred`` against ``y_true``."""
11521173
assert y_pred.shape == y_true.shape
11531174
return 1. - (np.sum(np.square(y_pred - y_true)) / # sum of square of residuals
11541175
np.sum(np.square(y_true - np.mean(y_true)))) # total sum of squares
@@ -1178,6 +1199,8 @@ def RestaurantDataSet(examples=None):
11781199

11791200

11801201
def T(attr_name, branches):
1202+
"""Build a DecisionFork testing the restaurant attribute ``attr_name``, wrapping each
1203+
non-fork child in a DecisionLeaf; a shorthand for writing decision trees by hand."""
11811204
branches = {value: (child if isinstance(child, DecisionFork) else DecisionLeaf(child))
11821205
for value, child in branches.items()}
11831206
return DecisionFork(restaurant.attr_num(attr_name), attr_name, print, branches)

learning4e.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def attr_num(self, attr):
126126
return attr
127127

128128
def update_values(self):
129+
"""Recompute ``self.values`` (the list of distinct values per attribute) from the examples."""
129130
self.values = list(map(unique, zip(*self.examples)))
130131

131132
def sanitize(self, example):
@@ -312,6 +313,8 @@ def leave_one_out(learner, dataset, size=None):
312313

313314

314315
def learning_curve(learner, dataset, trials=10, sizes=None):
316+
"""Return a list of (training-set size, mean accuracy) pairs, obtained by
317+
repeatedly cross-validating the learner on training sets of each given size."""
315318
if sizes is None:
316319
sizes = list(range(2, len(dataset.examples) - trials, 2))
317320

@@ -363,6 +366,7 @@ def add(self, val, subtree):
363366
self.branches[val] = subtree
364367

365368
def display(self, indent=0):
369+
"""Print this subtree, showing the tested attribute and each branch, indented by ``indent``."""
366370
name = self.attr_name
367371
print('Test', name)
368372
for (val, subtree) in self.branches.items():
@@ -383,6 +387,7 @@ def __call__(self, example):
383387
return self.result
384388

385389
def display(self):
390+
"""Print the result stored at this leaf."""
386391
print('RESULT =', self.result)
387392

388393
def __repr__(self):
@@ -397,6 +402,9 @@ def __init__(self, dataset):
397402
self.tree = self.decision_tree_learning(dataset.examples, dataset.inputs)
398403

399404
def decision_tree_learning(self, examples, attrs, parent_examples=()):
405+
"""Recursively build a decision tree: pick the most informative attribute, branch on
406+
its values, and recurse, returning a leaf when examples are empty, all of one class,
407+
or no attributes remain. [Figure 18.5]"""
400408
if len(examples) == 0:
401409
return self.plurality_value(parent_examples)
402410
if self.all_same_class(examples):
@@ -449,6 +457,7 @@ def split_by(self, attr, examples):
449457
return [(v, [e for e in examples if e[attr] == v]) for v in self.dataset.values[attr]]
450458

451459
def predict(self, x):
460+
"""Classify example ``x`` by walking the learned decision tree."""
452461
return self.tree(x)
453462

454463

@@ -497,6 +506,9 @@ def find_examples(self, examples):
497506
return None, None, None
498507

499508
def decision_list_learning(self, examples):
509+
"""Recursively build the decision list: find a test selecting a non-empty subset of
510+
examples sharing one outcome, append (test, outcome), and recurse on the rest;
511+
raise ValueError if no separating test exists. [Figure 18.11]"""
500512
if not examples:
501513
return [((), None)] # catch-all: the empty test matches any example
502514
test, outcome, matched = self.find_examples(examples)
@@ -526,6 +538,8 @@ def predict(self, example):
526538

527539

528540
class SVC:
541+
"""Support Vector Classifier trained in dual form by solving a quadratic
542+
programming problem; supports arbitrary kernels and a soft-margin penalty ``C``."""
529543

530544
def __init__(self, kernel=linear_kernel, C=1.0, verbose=False):
531545
self.kernel = kernel
@@ -591,6 +605,8 @@ def predict(self, X):
591605

592606

593607
class SVR:
608+
"""Support Vector Regressor trained in dual form by solving a quadratic
609+
programming problem, using an epsilon-insensitive loss and penalty ``C``."""
594610

595611
def __init__(self, kernel=linear_kernel, C=1.0, epsilon=0.1, verbose=False):
596612
self.kernel = kernel
@@ -649,12 +665,15 @@ def solve_qp(self, X, y):
649665
self.alphas_n = alphas[m:]
650666

651667
def predict(self, X):
668+
"""Predict the regression target value(s) for the samples ``X``."""
652669
if self.kernel != linear_kernel:
653670
return np.dot(self.alphas_p - self.alphas_n, self.kernel(self.sv, X)) + self.b
654671
return np.dot(X, self.w) + self.b
655672

656673

657674
class MultiClassLearner:
675+
"""Wrap a binary classifier ``clf`` to handle multiple classes, using either
676+
the one-vs-rest ('ovr') or one-vs-one ('ovo') decision function."""
658677

659678
def __init__(self, clf, decision_function='ovr'):
660679
self.clf = clf
@@ -811,9 +830,11 @@ def __init__(self, learners):
811830
self.learners = learners
812831

813832
def train(self, dataset):
833+
"""Train each constituent learner on ``dataset`` and store the resulting predictors."""
814834
self.predictors = [learner(dataset) for learner in self.learners]
815835

816836
def predict(self, example):
837+
"""Classify ``example`` by majority vote of the trained predictors."""
817838
return mode(predictor.predict(example) for predictor in self.predictors)
818839

819840

@@ -847,6 +868,7 @@ def __init__(self, predictors, weights):
847868
self.weights = weights
848869

849870
def predict(self, example):
871+
"""Classify ``example`` by the weighted vote of the predictors."""
850872
return weighted_mode((predictor.predict(example) for predictor in self.predictors), self.weights)
851873

852874

@@ -883,6 +905,7 @@ def feature_bagging(self, p=0.7):
883905
return inputs or self.dataset.inputs
884906

885907
def predict(self, example):
908+
"""Classify ``example`` by majority vote of the forest's decision trees."""
886909
return mode(predictor.predict(example) for predictor in self.predictors)
887910

888911

@@ -930,11 +953,13 @@ def weighted_replicate(seq, weights, n):
930953
# metrics
931954

932955
def accuracy_score(y_pred, y_true):
956+
"""Return the fraction of predictions in ``y_pred`` that match ``y_true``."""
933957
assert y_pred.shape == y_true.shape
934958
return np.mean(y_pred == y_true)
935959

936960

937961
def r2_score(y_pred, y_true):
962+
"""Return the R^2 (coefficient of determination) of ``y_pred`` against ``y_true``."""
938963
assert y_pred.shape == y_true.shape
939964
return 1. - (np.sum(np.square(y_pred - y_true)) / # sum of square of residuals
940965
np.sum(np.square(y_true - np.mean(y_true)))) # total sum of squares
@@ -964,6 +989,8 @@ def RestaurantDataSet(examples=None):
964989

965990

966991
def T(attr_name, branches):
992+
"""Build a DecisionFork testing the restaurant attribute ``attr_name``, wrapping each
993+
non-fork child in a DecisionLeaf; a shorthand for writing decision trees by hand."""
967994
branches = {value: (child if isinstance(child, DecisionFork) else DecisionLeaf(child))
968995
for value, child in branches.items()}
969996
return DecisionFork(restaurant.attr_num(attr_name), attr_name, print, branches)

0 commit comments

Comments
 (0)