|
144 | 144 | <span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
|
145 | 145 | <span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
146 | 146 |
|
147 |
| -<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><span class="n">TfidfVectorizer</span></a></a></a></a> |
148 |
| -<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><span class="n">NMF</span></a></a></a></a> |
| 147 | +<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><span class="n">TfidfVectorizer</span></a></a></a></a></a> |
| 148 | +<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><span class="n">NMF</span></a></a></a></a></a> |
149 | 149 | <span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">fetch_20newsgroups</span>
|
150 | 150 |
|
151 | 151 | <span class="n">n_samples</span> <span class="o">=</span> <span class="mi">2000</span>
|
|
163 | 163 | <span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_20newsgroups</span><span class="p">(</span><span class="n">shuffle</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
164 | 164 | <span class="n">remove</span><span class="o">=</span><span class="p">(</span><span class="s">'headers'</span><span class="p">,</span> <span class="s">'footers'</span><span class="p">,</span> <span class="s">'quotes'</span><span class="p">))</span>
|
165 | 165 |
|
166 |
| -<span class="n">vectorizer</span> <span class="o">=</span> <a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><span class="n">TfidfVectorizer</span></a></a></a></a><span class="p">(</span><span class="n">max_df</span><span class="o">=</span><span class="mf">0.95</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">max_features</span><span class="o">=</span><span class="n">n_features</span><span class="p">,</span> |
| 166 | +<span class="n">vectorizer</span> <span class="o">=</span> <a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><a href="../../modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn.feature_extraction.text.TfidfVectorizer"><span class="n">TfidfVectorizer</span></a></a></a></a></a><span class="p">(</span><span class="n">max_df</span><span class="o">=</span><span class="mf">0.95</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">max_features</span><span class="o">=</span><span class="n">n_features</span><span class="p">,</span> |
167 | 167 | <span class="n">stop_words</span><span class="o">=</span><span class="s">'english'</span><span class="p">)</span>
|
168 | 168 | <span class="n">tfidf</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">data</span><span class="p">[:</span><span class="n">n_samples</span><span class="p">])</span>
|
169 | 169 | <span class="k">print</span><span class="p">(</span><span class="s">"done in </span><span class="si">%0.3f</span><span class="s">s."</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>
|
170 | 170 |
|
171 | 171 | <span class="c"># Fit the NMF model</span>
|
172 | 172 | <span class="k">print</span><span class="p">(</span><span class="s">"Fitting the NMF model with n_samples=</span><span class="si">%d</span><span class="s"> and n_features=</span><span class="si">%d</span><span class="s">..."</span>
|
173 | 173 | <span class="o">%</span> <span class="p">(</span><span class="n">n_samples</span><span class="p">,</span> <span class="n">n_features</span><span class="p">))</span>
|
174 |
| -<span class="n">nmf</span> <span class="o">=</span> <a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><span class="n">NMF</span></a></a></a></a><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">n_topics</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">tfidf</span><span class="p">)</span> |
| 174 | +<span class="n">nmf</span> <span class="o">=</span> <a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><a href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF"><span class="n">NMF</span></a></a></a></a></a><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">n_topics</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">tfidf</span><span class="p">)</span> |
175 | 175 | <span class="k">print</span><span class="p">(</span><span class="s">"done in </span><span class="si">%0.3f</span><span class="s">s."</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>
|
176 | 176 |
|
177 | 177 | <span class="n">feature_names</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">get_feature_names</span><span class="p">()</span>
|
|
0 commit comments