UTS#

  1. Lakukan analisa data klasifikasi dengan sumber data datasets/breast-cancer dengan menggunakan naive bayes dan knn untuk data

  2. Jelakan proses tersebut dengan menulisnya di blog github yang anda telah miliki ( jupyter book).

  3. Upload alamat blog github anda di schoology

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# Baca dataset
url = "https://raw.githubusercontent.com/datasets/breast-cancer/master/data/breast-cancer.csv"
df = pd.read_csv(url)
print(df.head(5))
# Mengubah fitur kategorikal menjadi numerik dengan one-hot encoding
df_encoded = pd.get_dummies(df, columns=['age', 'mefalsepause', 'tumor-size', 'inv-falsedes', 'falsede-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat'])
# Pisahkan fitur dan target
X = df_encoded.drop('class', axis=1)
y = df_encoded['class']
     age mefalsepause tumor-size inv-falsedes falsede-caps  deg-malig breast  \
0  40-49   premefalse      15-19          0-2         True          3  right   
1  50-59         ge40      15-19          0-2        False          1  right   
2  50-59         ge40      35-39          0-2        False          2   left   
3  40-49   premefalse      35-39          0-2         True          3  right   
4  40-49   premefalse      30-34          3-5         True          2   left   

  breast-quad  irradiat                    class  
0     left_up     False        recurrence-events  
1     central     False  false-recurrence-events  
2    left_low     False        recurrence-events  
3    left_low      True  false-recurrence-events  
4    right_up     False        recurrence-events  
# Membagi data menjadi training set dan test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat objek klasifikasi Naive Bayes
naive_bayes = GaussianNB()

# Melatih model menggunakan training set
naive_bayes.fit(X_train, y_train)
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Memprediksi label dari data test
y_pred = naive_bayes.predict(X_test)
print('prediksi :')
print(y_pred)
prediksi :
['recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events']
# Menghitung akurasi prediksi
accuracy = accuracy_score(y_test, y_pred)
print("Akurasi Naive Bayes:", accuracy)
print(confusion_matrix(y_test, y_pred))
Akurasi Naive Bayes: 0.32727272727272727
[[ 6 35]
 [ 2 12]]
knn = KNeighborsClassifier(n_neighbors=5)
# Melatih model menggunakan training set
knn.fit(X_train, y_train)
# Memprediksi label dari data test
y_pred = knn.predict(X_test)
print(y_pred)
# Menghitung akurasi prediksi
accuracy = accuracy_score(y_test, y_pred)
print("Akurasi KNN:", accuracy)
print(confusion_matrix(y_test, y_pred))
['false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events' 'recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events'
 'false-recurrence-events' 'false-recurrence-events' 'recurrence-events']
Akurasi KNN: 0.7272727272727273
[[35  6]
 [ 9  5]]