In [None]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target
feature_names = iris.feature_names
class_names = iris.target_names

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train a decision tree classifier
dt_model = DecisionTreeClassifier(criterion='gini', random_state=42)
dt_model.fit(X_train, y_train)

In [None]:
# Train a random forest classifier
rf_model = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42)
rf_model.fit(X_train, y_train)

In [None]:
# Train an XGBoost classifier
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_model.fit(X_train, y_train)

In [None]:
# Make predictions
dt_y_pred = dt_model.predict(X_test)
rf_y_pred = rf_model.predict(X_test)
xgb_y_pred = xgb_model.predict(X_test)

In [None]:
# Evaluate the models
dt_accuracy = accuracy_score(y_test, dt_y_pred)
rf_accuracy = accuracy_score(y_test, rf_y_pred)
xgb_accuracy = accuracy_score(y_test, xgb_y_pred)

dt_report = classification_report(y_test, dt_y_pred, target_names=class_names)
rf_report = classification_report(y_test, rf_y_pred, target_names=class_names)
xgb_report = classification_report(y_test, xgb_y_pred, target_names=class_names)

In [None]:
# Print results
print(f"Decision Tree Accuracy: {dt_accuracy:.2f}\n")
print("Decision Tree Classification Report:\n", dt_report)

print(f"Random Forest Accuracy: {rf_accuracy:.2f}\n")
print("Random Forest Classification Report:\n", rf_report)

print(f"XGBoost Accuracy: {xgb_accuracy:.2f}\n")
print("XGBoost Classification Report:\n", xgb_report)

In [None]:
# Plot the decision tree with Gini index
plt.figure(figsize=(12, 8))
plot_tree(dt_model, feature_names=feature_names, class_names=class_names, filled=True, impurity=True)
plt.title("Decision Tree Visualization with Gini Index")
plt.show()

In [None]:
# Get feature importance scores
dt_importances = dt_model.feature_importances_
rf_importances = rf_model.feature_importances_
xgb_importances = xgb_model.feature_importances_

In [None]:
# Plot feature importances for all models
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].barh(feature_names, dt_importances, color='skyblue')
axes[0].set_title("Feature Importance (Decision Tree)")
axes[0].set_xlabel("Importance")
axes[0].invert_yaxis()

axes[1].barh(feature_names, rf_importances, color='lightcoral')
axes[1].set_title("Feature Importance (Random Forest)")
axes[1].set_xlabel("Importance")
axes[1].invert_yaxis()

axes[2].barh(feature_names, xgb_importances, color='lightgreen')
axes[2].set_title("Feature Importance (XGBoost)")
axes[2].set_xlabel("Importance")
axes[2].invert_yaxis()

plt.tight_layout()
plt.show()