UTS#
Lakukan analisa data klasifikasi dengan sumber data datasets/breast-cancer dengan menggunakan naive bayes dan knn untuk data
Jelakan proses tersebut dengan menulisnya di blog github yang anda telah miliki ( jupyter book).
Upload alamat blog github anda di schoology
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# Baca dataset
url = "https://raw.githubusercontent.com/datasets/breast-cancer/master/data/breast-cancer.csv"
df = pd.read_csv(url)
print(df.head(5))
# Mengubah fitur kategorikal menjadi numerik dengan one-hot encoding
df_encoded = pd.get_dummies(df, columns=['age', 'mefalsepause', 'tumor-size', 'inv-falsedes', 'falsede-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat'])
# Pisahkan fitur dan target
X = df_encoded.drop('class', axis=1)
y = df_encoded['class']
age mefalsepause tumor-size inv-falsedes falsede-caps deg-malig breast \
0 40-49 premefalse 15-19 0-2 True 3 right
1 50-59 ge40 15-19 0-2 False 1 right
2 50-59 ge40 35-39 0-2 False 2 left
3 40-49 premefalse 35-39 0-2 True 3 right
4 40-49 premefalse 30-34 3-5 True 2 left
breast-quad irradiat class
0 left_up False recurrence-events
1 central False false-recurrence-events
2 left_low False recurrence-events
3 left_low True false-recurrence-events
4 right_up False recurrence-events
# Membagi data menjadi training set dan test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Membuat objek klasifikasi Naive Bayes
naive_bayes = GaussianNB()
# Melatih model menggunakan training set
naive_bayes.fit(X_train, y_train)
GaussianNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
# Memprediksi label dari data test
y_pred = naive_bayes.predict(X_test)
print('prediksi :')
print(y_pred)
prediksi :
['recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'false-recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'false-recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'false-recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'false-recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'false-recurrence-events'
'recurrence-events' 'false-recurrence-events' 'recurrence-events'
'recurrence-events' 'false-recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'false-recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'recurrence-events'
'recurrence-events']
# Menghitung akurasi prediksi
accuracy = accuracy_score(y_test, y_pred)
print("Akurasi Naive Bayes:", accuracy)
print(confusion_matrix(y_test, y_pred))
Akurasi Naive Bayes: 0.32727272727272727
[[ 6 35]
[ 2 12]]
knn = KNeighborsClassifier(n_neighbors=5)
# Melatih model menggunakan training set
knn.fit(X_train, y_train)
# Memprediksi label dari data test
y_pred = knn.predict(X_test)
print(y_pred)
# Menghitung akurasi prediksi
accuracy = accuracy_score(y_test, y_pred)
print("Akurasi KNN:", accuracy)
print(confusion_matrix(y_test, y_pred))
['false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'recurrence-events' 'false-recurrence-events'
'recurrence-events' 'false-recurrence-events' 'recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
'recurrence-events' 'recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events'
'false-recurrence-events' 'false-recurrence-events' 'recurrence-events']
Akurasi KNN: 0.7272727272727273
[[35 6]
[ 9 5]]