fork download
  1. from sklearn import datasets
  2.  
  3. # Loading the iris plants dataset (classification)
  4. iris = datasets.load_iris()
  5. print(iris.target_names)
  6. print(iris.feature_names)
  7. # dividing the datasets into two parts i.e. training datasets and test datasets
  8. X, y = datasets.load_iris( return_X_y = True)
  9.  
  10. # Splitting arrays or matrices into random train and test subsets
  11. from sklearn.model_selection import train_test_split
  12. # i.e. 70 % training dataset and 30 % test datasets
  13. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)
  14. # importing random forest classifier from assemble module
  15. from sklearn.ensemble import RandomForestClassifier
  16. import pandas as pd
  17. # creating dataframe of IRIS dataset
  18. data = pd.DataFrame({'sepallength': iris.data[:, 0], 'sepalwidth': iris.data[:, 1],
  19. 'petallength': iris.data[:, 2], 'petalwidth': iris.data[:, 3],
  20. 'species': iris.target})
  21. # creating a RF classifier
  22. clf = RandomForestClassifier(n_estimators = 100)
  23.  
  24. # Training the model on the training dataset
  25. # fit function is used to train the model using the training sets as parameters
  26. clf.fit(X_train, y_train)
  27.  
  28. # performing predictions on the test dataset
  29. y_pred = clf.predict(X_test)
  30.  
  31. # metrics are used to find accuracy or error
  32. from sklearn import metrics
  33. print()
  34.  
  35. # using metrics module for accuracy calculation
  36. print("ACCURACY OF THE MODEL:", metrics.accuracy_score(y_test, y_pred))
  37. # predicting which type of flower it is.
  38. clf.predict([[5, 4, 1, 1]])
Success #stdin #stdout 0.99s 113812KB
stdin
Standard input is empty
stdout
['setosa' 'versicolor' 'virginica']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

ACCURACY OF THE MODEL: 0.9555555555555556