Random Forest Regression
A tutorial on How to use Random Forest Regression.
- 0. Data Preprocessing
- 1. Training the Random Forest Regression model on the training set
- 2. Predicting a new result on test set
- 3. Save a model
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
housing = pd.read_csv("housing.csv")
housing
housing.isna().sum()
housing['total_bedrooms'].median()
housing['total_bedrooms'].fillna(housing['total_bedrooms'].median(),inplace=True) #with pandas fillna
housing.isna().sum()
housing.info()
X = housing.drop("median_house_value",axis=1)
X
y = housing["median_house_value"]
y
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
categorical_features = ["ocean_proximity"]
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot",
one_hot,
categorical_features)],
remainder="passthrough")
transformed_X = transformer.fit_transform(X)
pd.DataFrame(transformed_X)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(transformed_X, y, test_size = 0.25, random_state = 2509)
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(x_train, y_train)
model.score(x_test, y_test)
y_preds = model.predict(x_test)
df = pd.DataFrame(data={"actual values": y_test,
"predicted values": y_preds})
df["differences"] = df["predicted values"] - df["actual values"]
df
import pickle
# Save an extisting model to file
pickle.dump(model, open("random_forest_model.pkl", "wb"))