Youtubeでビデオ・レッスン（2/16）機械学習入門 / k最近傍法 | 機械学習の手順と基本的なアルゴリズム

Jupyter Notebookなどで、コードを実装して実際に確かめてみましょう。

Jetson Nanoでjupyter-notebookを使う場合
mglearnは入ってないのでインストールしておきます
＄sudo pip3 install mglearn

#データを準備
import mglearn
import matplotlib.pyplot as plt
%matplitlib inline
mglearn.plots.plot_knn_classification(n_neighbors=3)
x,y = mglearn.datasets.make_forge()

#データを目視
x
x.shape
y

#散布図を描いてみる
mglearn.discrete_scatter(x[:,0],x[:,1],y)
plt.show()

#最近傍法の準備
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

#データを訓練用（train）とテスト用（test）に分割
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0)

#データを目視
x_train
x_train.shape
x_test.shape

#最近傍法の実装
clf = KNeighborsClassifier(n_neighbors=3)
#訓練データで学習の実行
clf.fit(x_train,y_train)
#テストデータで予測の実行
clf.predict(x_test)
#実際のラベルと比較
y_test
#正しさの割合を出してくれる関数を実行してみる
clf.score(x_test,y_test)
#データを丸めてみやすくする
round(clf.score(x_test,y_test),3)
#正規表現を使ってみる
print("{:.2f}".format(clf.score(x_test,y_test)))

#Kの値を変えてみる
for n_neighbors in range(1,16):
    KNeighborsClassifier(n_neighbors=n_neighbors).fit(x_train,y_train)
    print("Test set accuracy : n_neighbors={},{:.2f}".format(n_neighbors,clf.score(x_test,y_test)))

#Kの値を変えてみた時、境界線がどう変化するのか図で見てみる
#plt.show()は表示に少々時間がかかるかも
fig,axes = plt.subplots(1,5,figsize=(15,3))
for n_neighbors,ax in zip([1,3,5,10,15],axes):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(x_train,y_train)
    mglearn.plots.plot_2d_separator(clf,x,fill=True,ax=ax,alpha=0.5)
    mglearn.discrete_scatter(x[:,0],x[:,1],y,ax=ax)
    ax.set_title("{} neighbors".format(n_neighbors))
plt.show()

#実践的なデータを使ってやってみます。
#sklearnが持っているデータ
#ビデオではcancerというデータを使っています。
#基本フローは一緒なので、自分の手でやってみましょう(^^)

#データを準備

import mglearn

import matplotlib.pyplot as plt

%matplitlib inline

mglearn.plots.plot_knn_classification(n_neighbors=3)

x,y = mglearn.datasets.make_forge()

#データを目視

x.shape

#散布図を描いてみる

mglearn.discrete_scatter(x[:,0],x[:,1],y)

plt.show()

#最近傍法の準備

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

#データを訓練用（train）とテスト用（test）に分割

x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0)

#データを目視

x_train

x_train.shape

x_test.shape

#最近傍法の実装

clf = KNeighborsClassifier(n_neighbors=3)

#訓練データで学習の実行

clf.fit(x_train,y_train)

#テストデータで予測の実行

clf.predict(x_test)

#実際のラベルと比較

y_test

#正しさの割合を出してくれる関数を実行してみる

clf.score(x_test,y_test)

#データを丸めてみやすくする

round(clf.score(x_test,y_test),3)

#正規表現を使ってみる

print("{:.2f}".format(clf.score(x_test,y_test)))

#Kの値を変えてみる

for n_neighbors in range(1,16):

KNeighborsClassifier(n_neighbors=n_neighbors).fit(x_train,y_train)

print("Test set accuracy : n_neighbors={},{:.2f}".format(n_neighbors,clf.score(x_test,y_test)))

#Kの値を変えてみた時、境界線がどう変化するのか図で見てみる

#plt.show()は表示に少々時間がかかるかも

fig,axes = plt.subplots(1,5,figsize=(15,3))

for n_neighbors,ax in zip([1,3,5,10,15],axes):

clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(x_train,y_train)

mglearn.plots.plot_2d_separator(clf,x,fill=True,ax=ax,alpha=0.5)

mglearn.discrete_scatter(x[:,0],x[:,1],y,ax=ax)

ax.set_title("{} neighbors".format(n_neighbors))

plt.show()

#実践的なデータを使ってやってみます。

#sklearnが持っているデータ

#ビデオではcancerというデータを使っています。

#基本フローは一緒なので、自分の手でやってみましょう(^^)

FRONT

地図と画像のサイト

Youtubeでビデオ・レッスン（2/16）機械学習入門 / k最近傍法 | 機械学習の手順と基本的なアルゴリズム

Be the first to comment

Leave a Reply コメントをキャンセル