scikit-learn
diff --git a/‎dev/_downloads/unveil_tree_structure.py
Lines changed: 134 additions & 0 deletions b/‎dev/_downloads/unveil_tree_structure.py
Lines changed: 134 additions & 0 deletions
diff --git a/‎dev/_images/unveil_tree_structure.png
3.03 KB b/‎dev/_images/unveil_tree_structure.png
3.03 KB
diff --git a/‎dev/_images/unveil_tree_structure1.png
3.03 KB b/‎dev/_images/unveil_tree_structure1.png
3.03 KB
diff --git a/‎dev/_sources/auto_examples/index.txt
Lines changed: 25 additions & 0 deletions b/‎dev/_sources/auto_examples/index.txt
Lines changed: 25 additions & 0 deletions
diff --git a/‎dev/_sources/auto_examples/tree/unveil_tree_structure.txt
Lines changed: 27 additions & 0 deletions b/‎dev/_sources/auto_examples/tree/unveil_tree_structure.txt
Lines changed: 27 additions & 0 deletions
diff --git a/‎dev/auto_examples/index.html
Lines changed: 6 additions & 0 deletions b/‎dev/auto_examples/index.html
Lines changed: 6 additions & 0 deletions
diff --git a/‎dev/auto_examples/tree/plot_iris.html
Lines changed: 1 addition & 1 deletion b/‎dev/auto_examples/tree/plot_iris.html
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,134 @@
+"""
+=========================================
+Understanding the decision tree structure
+=========================================
+
+The decision tree structure can be analysed to gain further insight on the
+relation between the features and the target to predict. In this example, we
+show how to retrieve:
+
+- the binary tree structure;
+- the depth of each node and whether or not it's a leaf;
+- the nodes that were reached by a sample using the ``decision_path`` method;
+- the leaf that was reached by a sample using the apply method;
+- the rules that were used to predict a sample;
+- the decision path shared by a group of samples.
+
+"""
+import numpy as np
+
+from sklearn.cross_validation import train_test_split
+from sklearn.datasets import load_iris
+from sklearn.tree import DecisionTreeClassifier
+
+iris = load_iris()
+X = iris.data
+y = iris.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+estimator = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0)
+estimator.fit(X_train, y_train)
+
+# The decision estimator has an attribute called tree_  which stores the entire
+# tree structure and allows access to low level attributes. The binary tree
+# tree_ is represented as a number of parallel arrays. The i-th element of each
+# array holds information about the node `i`. Node 0 is the tree's root. NOTE:
+# Some of the arrays only apply to either leaves or split nodes, resp. In this
+# case the values of nodes of the other type are arbitrary!
+#
+# Among those arrays, we have:
+#   - left_child, id of the left child of the node
+#   - right_child, id of the right child of the node
+#   - feature, feature used for splitting the node
+#   - threshold, threshold value at the node
+#
+
+# Using those arrays, we can parse the tree structure:
+
+n_nodes = estimator.tree_.node_count
+children_left = estimator.tree_.children_left
+children_right = estimator.tree_.children_right
+feature = estimator.tree_.feature
+threshold = estimator.tree_.threshold
+
+
+# The tree structure can be traversed to compute various properties such
+# as the depth of each node and whether or not it is a leaf.
+node_depth = np.zeros(shape=n_nodes)
+is_leaves = np.zeros(shape=n_nodes, dtype=bool)
+stack = [(0, -1)]  # seed is the root node id and its parent depth
+while len(stack) > 0:
+    node_id, parent_depth = stack.pop()
+    node_depth[node_id] = parent_depth + 1
+
+    # If we have a test node
+    if (children_left[node_id] != children_right[node_id]):
+        stack.append((children_left[node_id], parent_depth + 1))
+        stack.append((children_right[node_id], parent_depth + 1))
+    else:
+        is_leaves[node_id] = True
+
+print("The binary tree structure has %s nodes and has "
+      "the following tree structure:"
+      % n_nodes)
+for i in range(n_nodes):
+    if is_leaves[i]:
+        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
+    else:
+        print("%snode=%s test node: go to node %s if X[:, %s] <= %ss else to "
+              "node %s."
+              % (node_depth[i] * "\t",
+                 i,
+                 children_left[i],
+                 feature[i],
+                 threshold[i],
+                 children_right[i],
+                 ))
+print()
+
+# First let's retrieve the decision path of each sample. The decision_path
+# method allows to retrieve the node indicator functions. A non zero element of
+# indicator matrix at the position (i, j) indicates that the sample i goes
+# through the node j.
+
+node_indicator = estimator.decision_path(X_test)
+
+# Similarly, we can also have the leaves ids reached by each sample.
+
+leave_id = estimator.apply(X_test)
+
+# Now, it's possible to get the tests that were used to predict a sample or
+# a group of samples. First, let's make it for the sample.
+
+sample_id = 0
+node_index = node_indicator.indices[node_indicator.indptr[sample_id]:
+                                    node_indicator.indptr[sample_id + 1]]
+
+print('Rules used to predict sample %s: ' % sample_id)
+for node_id in node_index:
+    if leave_id[sample_id] != node_id:
+        continue
+
+    if (X_test[sample_id, feature[node_id]] <= threshold[node_id]):
+        threshold_sign = "<="
+    else:
+        threshold_sign = ">"
+
+    print("decision id node %s : (X[%s, %s] (= %s) %s %s)"
+          % (node_id,
+             sample_id,
+             feature[node_id],
+             X_test[i, feature[node_id]],
+             threshold_sign,
+             threshold[node_id]))
+
+# For a group of samples, we have the following common node.
+sample_ids = [0, 1]
+common_nodes = (node_indicator.toarray()[sample_ids].sum(axis=0) ==
+                len(sample_ids))
+
+common_node_id = np.arange(n_nodes)[common_nodes]
+
+print("\nThe following samples %s share the node %s in the tree"
+      % (sample_ids, common_node_id))
+print("It is %s %% of all nodes." % (100 * len(common_node_id) / n_nodes,))
@@ -5442,6 +5442,31 @@ Examples concerning the :mod:`sklearn.tree` module.
    tree/plot_iris
 
 
+
+.. raw:: html
+
+    <div class="thumbnailContainer" tooltip="The decision tree structure can be analysed to gain further insight on the relation between the...">
+
+.. only:: html
+
+  .. figure:: tree/images/thumb/unveil_tree_structure.png
+    :target: ./tree/unveil_tree_structure.html
+
+    :ref:`example_tree_unveil_tree_structure.py`
+
+
+.. raw:: html
+
+    </div>
+
+
+
+.. toctree::
+   :hidden:
+
+   tree/unveil_tree_structure
+
+
 .. raw:: html
 
     <div class="clearer"></div>
 
@@ -0,0 +1,27 @@
+
+
+.. _example_tree_unveil_tree_structure.py:
+
+
+=========================================
+Understanding the decision tree structure
+=========================================
+
+The decision tree structure can be analysed to gain further insight on the
+relation between the features and the target to predict. In this example, we
+show how to retrieve:
+
+- the binary tree structure;
+- the depth of each node and whether or not it's a leaf;
+- the nodes that were reached by a sample using the ``decision_path`` method;
+- the leaf that was reached by a sample using the apply method;
+- the rules that were used to predict a sample;
+- the decision path shared by a group of samples.
+
+
+
+**Python source code:** :download:`unveil_tree_structure.py <unveil_tree_structure.py>`
+
+.. literalinclude:: unveil_tree_structure.py
+    :lines: 18-
+    
@@ -1535,6 +1535,12 @@ <h2>Tutorial exercises<a class="headerlink" href="#tutorial-exercises" title="Pe
 </div>
 </div><div class="toctree-wrapper compound">
 </div>
+<div class="thumbnailContainer" tooltip="The decision tree structure can be analysed to gain further insight on the relation between the..."><div class="figure">
+<a class="reference external image-reference" href="./tree/unveil_tree_structure.html"><img alt="../_images/unveil_tree_structure.png" src="../_images/unveil_tree_structure.png" /></a>
+<p class="caption"><a class="reference internal" href="tree/unveil_tree_structure.html#example-tree-unveil-tree-structure-py"><em>Understanding the decision tree structure</em></a></p>
+</div>
+</div><div class="toctree-wrapper compound">
+</div>
 <div class="clearer"></div></div>
 </div>
 
 
@@ -36,7 +36,7 @@
     <link rel="author" title="About these documents" href="../../about.html" />
     <link rel="top" title="scikit-learn 0.18.dev0 documentation" href="../../index.html" />
     <link rel="up" title="Examples" href="../index.html" />
-    <link rel="next" title="API Reference" href="../../modules/classes.html" />
+    <link rel="next" title="Understanding the decision tree structure" href="unveil_tree_structure.html" />
     <link rel="prev" title="Multi-output Decision Tree Regression" href="plot_tree_regression_multioutput.html" />