Naive bayes
Using naive_bayes to detect is Email is Spam or not.
- 0. Data preprocessing
- 1. Training the Naive Bayes model on the Training set
- 2. Making the Confusion Matrix
- 3. Compare Both models
- 4. SAVE MODEL
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
email = pd.read_csv('emails.csv')
email
email.head()
len(email)
email.isna().sum()
email.shape
email.info()
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, len(email)):
e_mail = re.sub('[^a-zA-Z]', ' ', email['text'][i])
e_mail = e_mail.split()
ps = PorterStemmer()
e_mail = [ps.stem(word) for word in e_mail if not word in set(stopwords.words('english'))]
e_mail = ' '.join(e_mail)
corpus.append(e_mail)
corpus[2509]
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(corpus).toarray()
y = email['spam']
len(X)
X.shape
len(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 2509)
from sklearn.naive_bayes import GaussianNB
GN_classifier = GaussianNB()
GN_classifier.fit(X_train, y_train)
GN_score = GN_classifier.score(X_test,y_test)
GN_score
y_GN_pred = GN_classifier.predict(X_test) #Predicting the Test set results
from sklearn.naive_bayes import MultinomialNB
MN_classifier = MultinomialNB()
MN_classifier.fit(X_train, y_train)
MN_score = MN_classifier.score(X_test,y_test)
MN_score
y_MN_pred = MN_classifier.predict(X_test) #Predicting the Test set results
from sklearn.metrics import confusion_matrix
GN_cm = confusion_matrix(y_test, y_GN_pred)
print(GN_cm)
from sklearn.metrics import confusion_matrix
MN_cm = confusion_matrix(y_test, y_MN_pred)
print(MN_cm)
models = pd.DataFrame({"GaussianNB": GN_score,
"MultinomialNB": MN_score
},
index=[0])
models.T.plot.bar(title="Comapre different models",
legend=False)
plt.xticks(rotation=0);
import pickle
pickle.dump(MN_classifier,open("Email_spam_naive_bayes_MN.pkl","wb"))