import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV

# import the dataset
df = pd.read_csv('data/pokedex.csv')
# Print head
df.head()

# Print info such as data types and number of non-null values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1045 entries, 0 to 1044
Data columns (total 51 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        1045 non-null   int64  
 1   pokedex_number    1045 non-null   int64  
 2   name              1045 non-null   object 
 3   german_name       1045 non-null   object 
 4   japanese_name     1045 non-null   object 
 5   generation        1045 non-null   int64  
 6   status            1045 non-null   object 
 7   species           1045 non-null   object 
 8   type_number       1045 non-null   int64  
 9   type_1            1045 non-null   object 
 10  type_2            553 non-null    object 
 11  height_m          1045 non-null   float64
 12  weight_kg         1044 non-null   float64
 13  abilities_number  1045 non-null   int64  
 14  ability_1         1042 non-null   object 
 15  ability_2         516 non-null    object 
 16  ability_hidden    813 non-null    object 
 17  total_points      1045 non-null   int64  
 18  hp                1045 non-null   int64  
 19  attack            1045 non-null   int64  
 20  defense           1045 non-null   int64  
 21  sp_attack         1045 non-null   int64  
 22  sp_defense        1045 non-null   int64  
 23  speed             1045 non-null   int64  
 24  catch_rate        1027 non-null   float64
 25  base_friendship   930 non-null    float64
 26  base_experience   925 non-null    float64
 27  growth_rate       1044 non-null   object 
 28  egg_type_number   1045 non-null   int64  
 29  egg_type_1        1042 non-null   object 
 30  egg_type_2        285 non-null    object 
 31  percentage_male   872 non-null    float64
 32  egg_cycles        1044 non-null   float64
 33  against_normal    1045 non-null   float64
 34  against_fire      1045 non-null   float64
 35  against_water     1045 non-null   float64
 36  against_electric  1045 non-null   float64
 37  against_grass     1045 non-null   float64
 38  against_ice       1045 non-null   float64
 39  against_fight     1045 non-null   float64
 40  against_poison    1045 non-null   float64
 41  against_ground    1045 non-null   float64
 42  against_flying    1045 non-null   float64
 43  against_psychic   1045 non-null   float64
 44  against_bug       1045 non-null   float64
 45  against_rock      1045 non-null   float64
 46  against_ghost     1045 non-null   float64
 47  against_dragon    1045 non-null   float64
 48  against_dark      1045 non-null   float64
 49  against_steel     1045 non-null   float64
 50  against_fairy     1045 non-null   float64
dtypes: float64(25), int64(13), object(13)
memory usage: 416.5+ KB

# Print summary statistics of numeric types
df.describe()

# show all columns with non-numeric type
object_columns = df.select_dtypes(include=['object']).columns

# Print the selected columns
print(object_columns)

Index(['name', 'german_name', 'japanese_name', 'status', 'species', 'type_1',
       'type_2', 'ability_1', 'ability_2', 'ability_hidden', 'growth_rate',
       'egg_type_1', 'egg_type_2'],
      dtype='object')

# Drop the names and index numbers
df.drop(['name', 'Unnamed: 0', 'german_name', 'japanese_name', 'pokedex_number'], axis=1, inplace=True)
# Print the head of the dataframe
df.head()

# Drop the species
df.drop(['species'], axis=1, inplace=True)
# Print the head of the dataframe
df.head()

#Verify that the last 18 columns are the against_? data
print(df.columns[-18:])
# Drop the species
df.drop(df.columns[-18:], axis=1, inplace=True)
df.drop('type_number', axis=1, inplace=True)
# Print the head of the dataframe
df.head()

Index(['against_normal', 'against_fire', 'against_water', 'against_electric',
       'against_grass', 'against_ice', 'against_fight', 'against_poison',
       'against_ground', 'against_flying', 'against_psychic', 'against_bug',
       'against_rock', 'against_ghost', 'against_dragon', 'against_dark',
       'against_steel', 'against_fairy'],
      dtype='object')

# Check for missing values
print(df.isnull().sum())

generation            0
status                0
type_1                0
type_2              492
height_m              0
weight_kg             1
abilities_number      0
ability_1             3
ability_2           529
ability_hidden      232
total_points          0
hp                    0
attack                0
defense               0
sp_attack             0
sp_defense            0
speed                 0
catch_rate           18
base_friendship     115
base_experience     120
growth_rate           1
egg_type_number       0
egg_type_1            3
egg_type_2          760
percentage_male     173
egg_cycles            1
dtype: int64

print("Missing weight_kg: ",  np.where(df['weight_kg'].isnull()))

Missing weight_kg:  (array([1033], dtype=int64),)

print("Missing ability_1: ",  np.where(df['ability_1'].isnull()))

Missing ability_1:  (array([  33,  172, 1033], dtype=int64),)

# Drop catch_rate, base_friendship and base_experience
df.drop(['catch_rate', 'base_friendship', 'base_experience', 'percentage_male'], axis=1, inplace=True)

# Print the head of the dataframe
df.head()

print("Missing growth_rate: ",  np.where(df['growth_rate'].isnull()))

Missing growth_rate:  (array([658], dtype=int64),)

print("Missing egg_type_1: ",  np.where(df['egg_type_1'].isnull()))

Missing egg_type_1:  (array([ 33, 172, 658], dtype=int64),)

print(np.where(df['egg_cycles'].isnull()))

(array([658], dtype=int64),)

#Partner Pikachu
print("Partner Pikachu ability_1 : ", df['ability_1'][33]) #to show Partner Pikachu's ability_1 is nan
df['ability_1'][33] = df['ability_1'][32]                  #Parner Pikachu's ability_1 is the same as regular Pikachu (row 32)
print("(Updated) Partner Pikachu ability_1 : ", df['ability_1'][33]) #to show Partner Pikachu's ability_1 now has the correct value

print("Partner Pikachu egg_type_number : ", df['egg_type_number'][33]) #nan
df['egg_type_number'][33] = df['egg_type_number'][32]                  #it's the same as regular Pikachu (row 32)
print("(Updated) Partner Pikachu egg_type_number : ", df['egg_type_number'][33]) #now has the correct value

print("Partner Pikachu egg_type_1 : ", df['egg_type_1'][33]) #nan
df['egg_type_1'][33] = df['egg_type_1'][32]                  #it's the same as regular Pikachu (row 32)
print("(Updated) Partner Pikachu egg_type_1 : ", df['egg_type_1'][33]) #now has the correct value

#(Partner) Pikachu has two egg types
print("Partner Pikachu egg_type_2 : ", df['egg_type_2'][33]) #nan
df['egg_type_2'][33] = df['egg_type_2'][32]                  #it's the same as regular Pikachu (row 32)
print("(Updated) Partner Pikachu egg_type_2 : ", df['egg_type_2'][33]) #now has the correct value

Partner Pikachu ability_1 :  nan
(Updated) Partner Pikachu ability_1 :  Static
Partner Pikachu egg_type_number :  0
(Updated) Partner Pikachu egg_type_number :  2
Partner Pikachu egg_type_1 :  nan
(Updated) Partner Pikachu egg_type_1 :  Fairy
Partner Pikachu egg_type_2 :  nan
(Updated) Partner Pikachu egg_type_2 :  Field

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:3: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['ability_1'][33] = df['ability_1'][32]                  #Parner Pikachu's ability_1 is the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ability_1'][33] = df['ability_1'][32]                  #Parner Pikachu's ability_1 is the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:7: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_number'][33] = df['egg_type_number'][32]                  #it's the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_number'][33] = df['egg_type_number'][32]                  #it's the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:11: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_1'][33] = df['egg_type_1'][32]                  #it's the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_1'][33] = df['egg_type_1'][32]                  #it's the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:16: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_2'][33] = df['egg_type_2'][32]                  #it's the same as regular Pikachu (row 32)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1639035027.py:16: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_2'][33] = df['egg_type_2'][32]                  #it's the same as regular Pikachu (row 32)

#Partner Eevee
print("Partner Eevee ability_1 : ", df['ability_1'][172])           #to show Partner Eevee's ability_1 is nan
df['ability_1'][172] = df['ability_1'][171]                         #Parner Eevee's ability_1 is the same as regular Eevee (row 171)
print("(Updated) Partner Eevee ability_1 : ", df['ability_1'][172]) #to show Partner Pikachu's ability_1 now has the correct value

print("Partner Eevee egg_type_number : ", df['egg_type_number'][172])           #nan
df['egg_type_number'][172] = df['egg_type_number'][171]                         #it's the same as regular Eevee (row 171)
print("(Updated) Partner Eevee egg_type_number : ", df['egg_type_number'][172]) #now has the correct value

print("Partner Eevee egg_type_1 : ", df['egg_type_1'][172])           #nan
df['egg_type_1'][172] = df['egg_type_1'][171]                         #it's the same as regular Eevee (row 171)
print("(Updated) Partner Eevee egg_type_1 : ", df['egg_type_1'][172]) #now has the correct value

#(Partner) Eevee only has one egg type, so this won't change anything
print("Partner Eevee egg_type_2 : ", df['egg_type_2'][172])           #nan
df['egg_type_2'][172] = df['egg_type_2'][171]                         #it's the same as regular Eevee (row 171)
print("(Updated) Partner Eevee egg_type_2 : ", df['egg_type_2'][172]) #now has the correct value, still nan

Partner Eevee ability_1 :  nan
(Updated) Partner Eevee ability_1 :  Run Away
Partner Eevee egg_type_number :  0
(Updated) Partner Eevee egg_type_number :  1
Partner Eevee egg_type_1 :  nan
(Updated) Partner Eevee egg_type_1 :  Field
Partner Eevee egg_type_2 :  nan
(Updated) Partner Eevee egg_type_2 :  nan

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:3: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['ability_1'][172] = df['ability_1'][171]                         #Parner Eevee's ability_1 is the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ability_1'][172] = df['ability_1'][171]                         #Parner Eevee's ability_1 is the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:7: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_number'][172] = df['egg_type_number'][171]                         #it's the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_number'][172] = df['egg_type_number'][171]                         #it's the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:11: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_1'][172] = df['egg_type_1'][171]                         #it's the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_1'][172] = df['egg_type_1'][171]                         #it's the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:16: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_2'][172] = df['egg_type_2'][171]                         #it's the same as regular Eevee (row 171)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3308900266.py:16: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_2'][172] = df['egg_type_2'][171]                         #it's the same as regular Eevee (row 171)

#Galarian Darmanitan Zen Mode
print("Galarian Darmanitan Zen Mode growth_rate : ", df['growth_rate'][658]) #to show Galarian Darmanitan Zen Mode's growth_rate is nan
df['growth_rate'][658] = df['growth_rate'][656]                              #Galarian Darmanitan Zen Mode's growth_rate is the same as every 
                                                                             #other form of Darmanitan (rows 655, 656, 657), Medium Slow
print("(Updated) Galarian Darmanitan Zen Mode growth_rate : ", df['growth_rate'][658]) #to show Galarian Darmanitan Zen Mode's growth_rate 
                                                                                       #now has the correct value

print("Galarian Darmanitan Zen Mode egg_type_number : ", df['egg_type_number'][658])           #nan
df['egg_type_number'][658] = df['egg_type_number'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
print("(Updated) Galarian Darmanitan Zen Mode egg_type_number : ", df['egg_type_number'][658]) #now has the correct value

print("Galarian Darmanitan Zen Mode egg_type_1 : ", df['egg_type_1'][658])       #nan
df['egg_type_1'][658] = df['egg_type_1'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
print("(Updated) Galarian Darmanitan Zen Mode egg_type_1 : ", df['egg_type_1'][658]) #now has the correct value

#(Galarian) Darmanitan (Zen Mode) only has one egg type, so this won't change anything
print("Galarian Darmanitan Zen Mode egg_type_2 : ", df['egg_type_2'][658])        #nan
df['egg_type_2'][658] = df['egg_type_2'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
print("(Updated) Galarian Darmanitan Zen Mode egg_type_2 : ", df['egg_type_2'][658]) #now has the correct value, still nan

print("Galarian Darmanitan Zen Mode egg_cycles : ", df['egg_cycles'][658]) #nan
df['egg_cycles'][658] = df['egg_cycles'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
print("(Updated) Galarian Darmanitan Zen Mode egg_cycles : ", df['egg_cycles'][658]) #now has the correct value

Galarian Darmanitan Zen Mode growth_rate :  nan
(Updated) Galarian Darmanitan Zen Mode growth_rate :  Medium Slow
Galarian Darmanitan Zen Mode egg_type_number :  0
(Updated) Galarian Darmanitan Zen Mode egg_type_number :  1
Galarian Darmanitan Zen Mode egg_type_1 :  nan
(Updated) Galarian Darmanitan Zen Mode egg_type_1 :  Field
Galarian Darmanitan Zen Mode egg_type_2 :  nan
(Updated) Galarian Darmanitan Zen Mode egg_type_2 :  nan
Galarian Darmanitan Zen Mode egg_cycles :  nan
(Updated) Galarian Darmanitan Zen Mode egg_cycles :  20.0

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:3: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['growth_rate'][658] = df['growth_rate'][656]                              #Galarian Darmanitan Zen Mode's growth_rate is the same as every
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['growth_rate'][658] = df['growth_rate'][656]                              #Galarian Darmanitan Zen Mode's growth_rate is the same as every
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:9: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_number'][658] = df['egg_type_number'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_number'][658] = df['egg_type_number'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:13: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_1'][658] = df['egg_type_1'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_1'][658] = df['egg_type_1'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:18: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_type_2'][658] = df['egg_type_2'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:18: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_type_2'][658] = df['egg_type_2'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:22: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['egg_cycles'][658] = df['egg_cycles'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1102097238.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['egg_cycles'][658] = df['egg_cycles'][656]  #it's the same as any other form of Darmanitan (rows 655, 656, 657)

df.drop(index=1033, axis=0, inplace=True)

# Check for missing values
print(df.isnull().sum())

generation            0
status                0
type_1                0
type_2              492
height_m              0
weight_kg             0
abilities_number      0
ability_1             0
ability_2           528
ability_hidden      231
total_points          0
hp                    0
attack                0
defense               0
sp_attack             0
sp_defense            0
speed                 0
growth_rate           0
egg_type_number       0
egg_type_1            0
egg_type_2          758
egg_cycles            0
dtype: int64

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1044 entries, 0 to 1044
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   generation        1044 non-null   int64  
 1   status            1044 non-null   object 
 2   type_1            1044 non-null   object 
 3   type_2            552 non-null    object 
 4   height_m          1044 non-null   float64
 5   weight_kg         1044 non-null   float64
 6   abilities_number  1044 non-null   int64  
 7   ability_1         1044 non-null   object 
 8   ability_2         516 non-null    object 
 9   ability_hidden    813 non-null    object 
 10  total_points      1044 non-null   int64  
 11  hp                1044 non-null   int64  
 12  attack            1044 non-null   int64  
 13  defense           1044 non-null   int64  
 14  sp_attack         1044 non-null   int64  
 15  sp_defense        1044 non-null   int64  
 16  speed             1044 non-null   int64  
 17  growth_rate       1044 non-null   object 
 18  egg_type_number   1044 non-null   int64  
 19  egg_type_1        1044 non-null   object 
 20  egg_type_2        286 non-null    object 
 21  egg_cycles        1044 non-null   float64
dtypes: float64(3), int64(10), object(9)
memory usage: 187.6+ KB

# Plot the correlation matrix
sns.heatmap(df.select_dtypes(include=[np.number, bool]).corr(), square=True, cmap='RdYlGn');

['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Handling missing values in 'Type 2' column
df['type_2'].fillna('None', inplace=True)
df['ability_2'].fillna('None', inplace=True)
df['ability_hidden'].fillna('None', inplace=True)
df['egg_type_2'].fillna('None', inplace=True)

print(df.isnull().sum())

df.head()

generation          0
status              0
type_1              0
type_2              0
height_m            0
weight_kg           0
abilities_number    0
ability_1           0
ability_2           0
ability_hidden      0
total_points        0
hp                  0
attack              0
defense             0
sp_attack           0
sp_defense          0
speed               0
growth_rate         0
egg_type_number     0
egg_type_1          0
egg_type_2          0
egg_cycles          0
dtype: int64

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1302662185.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['type_2'].fillna('None', inplace=True)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1302662185.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ability_2'].fillna('None', inplace=True)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1302662185.py:4: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ability_hidden'].fillna('None', inplace=True)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1302662185.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['egg_type_2'].fillna('None', inplace=True)

df_one_hot = pd.get_dummies(df.drop(['type_1', 'type_2'], axis=1))
df_one_hot.head() # to check if it worked

df = pd.concat([df_one_hot, df[['type_1', 'type_2']]], axis=1)
df.head()
preprocessed_df = df.copy()

df = preprocessed_df.copy()

# Separate features and labels
X = df.drop(columns=['type_1', 'type_2'])

# Combine Type 1 and Type 2 into a single column
df['Types'] = df[['type_1', 'type_2']].apply(lambda x: tuple(filter(lambda y: pd.notna(y), x)), axis=1)
df.Types = df.Types.astype(str)

print(df['Types'][0])
print(len(df['Types'].unique()))


# drop the Type 1 and Type 2 columns
df.drop(['type_1', 'type_2'], axis=1, inplace=True)

# print head
df.head()

('Grass', 'Poison')
192

# show the distribution of pokemon types
sns.countplot(df, y='Types');

['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()
singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]

print("Number of singleton classes",len(singleton_classes))
print("number of unique type combinations",len(df['Types'].unique()))
print(len(df['Types']))
df.head()

Number of singleton classes 41
number of unique type combinations 192
1044

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
from sklearn.metrics import classification_report
X = df.drop(columns=['Types'])
y = df['Types']

X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=['Types']), other_data['Types'], test_size=0.2, stratify=other_data['Types'], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=['Types'])])
y_train = pd.concat([y_train, singleton_data['Types']])
X_test = pd.concat([X_test, singleton_data.drop(columns=['Types'])])
y_test = pd.concat([y_test, singleton_data['Types']])

print("actual test size:",len(X_test)/(len(X_train)+len(X_test)))

actual test size: 0.22304147465437787

# Initialize and train the decision tree classifier
from sklearn.metrics import accuracy_score
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.4834710743801653
Accuracy: 0.4834710743801653
                         precision    recall  f1-score   support

    ('Bug', 'Electric')       0.00      0.00      0.00         1
    ('Bug', 'Fighting')       0.00      0.00      0.00         1
      ('Bug', 'Flying')       0.00      0.00      0.00         3
       ('Bug', 'Ghost')       0.50      1.00      0.67         1
       ('Bug', 'Grass')       0.00      0.00      0.00         1
        ('Bug', 'None')       0.40      0.50      0.44         4
      ('Bug', 'Poison')       0.00      0.00      0.00         3
        ('Bug', 'Rock')       0.00      0.00      0.00         1
       ('Bug', 'Steel')       0.00      0.00      0.00         1
       ('Bug', 'Water')       0.00      0.00      0.00         1
     ('Dark', 'Dragon')       0.00      0.00      0.00         1
      ('Dark', 'Fairy')       1.00      1.00      1.00         1
       ('Dark', 'Fire')       0.00      0.00      0.00         1
     ('Dark', 'Flying')       0.00      0.00      0.00         1
      ('Dark', 'Ghost')       0.00      0.00      0.00         0
      ('Dark', 'Grass')       1.00      1.00      1.00         1
       ('Dark', 'None')       0.00      0.00      0.00         3
     ('Dark', 'Normal')       0.00      0.00      0.00         1
      ('Dark', 'Steel')       0.00      0.00      0.00         0
 ('Dragon', 'Electric')       1.00      1.00      1.00         1
    ('Dragon', 'Fairy')       1.00      1.00      1.00         1
     ('Dragon', 'Fire')       1.00      1.00      1.00         1
   ('Dragon', 'Flying')       0.00      0.00      0.00         1
    ('Dragon', 'Ghost')       1.00      1.00      1.00         1
   ('Dragon', 'Ground')       0.00      0.00      0.00         2
      ('Dragon', 'Ice')       0.50      1.00      0.67         1
     ('Dragon', 'None')       0.00      0.00      0.00         3
  ('Dragon', 'Psychic')       1.00      1.00      1.00         1
   ('Electric', 'Dark')       0.00      0.00      0.00         0
  ('Electric', 'Fairy')       0.00      0.00      0.00         0
   ('Electric', 'Fire')       0.33      1.00      0.50         1
 ('Electric', 'Flying')       0.00      0.00      0.00         1
  ('Electric', 'Ghost')       1.00      1.00      1.00         1
  ('Electric', 'Grass')       0.00      0.00      0.00         1
   ('Electric', 'None')       0.50      0.43      0.46         7
 ('Electric', 'Poison')       0.00      0.00      0.00         1
('Electric', 'Psychic')       1.00      1.00      1.00         1
  ('Electric', 'Steel')       0.00      0.00      0.00         1
  ('Electric', 'Water')       0.00      0.00      0.00         1
    ('Fairy', 'Flying')       0.00      0.00      0.00         0
      ('Fairy', 'None')       0.33      0.25      0.29         4
     ('Fairy', 'Steel')       0.50      1.00      0.67         1
   ('Fighting', 'Dark')       0.00      0.00      0.00         0
 ('Fighting', 'Flying')       0.00      0.00      0.00         0
  ('Fighting', 'Ghost')       1.00      1.00      1.00         1
    ('Fighting', 'Ice')       1.00      1.00      1.00         1
   ('Fighting', 'None')       0.67      0.33      0.44         6
('Fighting', 'Psychic')       0.00      0.00      0.00         1
  ('Fighting', 'Steel')       0.00      0.00      0.00         1
  ('Fighting', 'Water')       0.00      0.00      0.00         1
        ('Fire', 'Bug')       0.00      0.00      0.00         0
       ('Fire', 'Dark')       1.00      1.00      1.00         1
   ('Fire', 'Fighting')       1.00      1.00      1.00         1
     ('Fire', 'Flying')       0.00      0.00      0.00         2
     ('Fire', 'Ground')       0.00      0.00      0.00         1
       ('Fire', 'None')       0.33      0.57      0.42         7
       ('Fire', 'Rock')       1.00      1.00      1.00         1
      ('Fire', 'Steel')       1.00      1.00      1.00         1
      ('Fire', 'Water')       1.00      1.00      1.00         1
     ('Flying', 'None')       1.00      1.00      1.00         1
    ('Flying', 'Steel')       1.00      1.00      1.00         1
    ('Flying', 'Water')       1.00      1.00      1.00         1
      ('Ghost', 'Dark')       1.00      1.00      1.00         1
     ('Ghost', 'Fairy')       1.00      1.00      1.00         1
      ('Ghost', 'Fire')       1.00      1.00      1.00         1
    ('Ghost', 'Flying')       0.00      0.00      0.00         1
     ('Ghost', 'Grass')       0.67      1.00      0.80         2
      ('Ghost', 'None')       0.25      0.33      0.29         3
    ('Ghost', 'Poison')       1.00      1.00      1.00         1
      ('Grass', 'Dark')       0.00      0.00      0.00         1
    ('Grass', 'Dragon')       0.00      0.00      0.00         1
     ('Grass', 'Fairy')       0.00      0.00      0.00         1
  ('Grass', 'Fighting')       0.00      0.00      0.00         1
    ('Grass', 'Flying')       1.00      0.50      0.67         2
     ('Grass', 'Ghost')       1.00      1.00      1.00         1
    ('Grass', 'Ground')       1.00      1.00      1.00         1
       ('Grass', 'Ice')       1.00      1.00      1.00         1
      ('Grass', 'None')       0.58      0.78      0.67         9
    ('Grass', 'Poison')       0.75      1.00      0.86         3
     ('Grass', 'Steel')       0.00      0.00      0.00         1
     ('Ground', 'Dark')       0.00      0.00      0.00         1
 ('Ground', 'Electric')       1.00      1.00      1.00         1
     ('Ground', 'Fire')       1.00      1.00      1.00         1
   ('Ground', 'Flying')       0.00      0.00      0.00         1
    ('Ground', 'Ghost')       0.00      0.00      0.00         1
     ('Ground', 'None')       0.00      0.00      0.00         4
     ('Ground', 'Rock')       0.00      0.00      0.00         1
    ('Ground', 'Steel')       1.00      1.00      1.00         1
         ('Ice', 'Bug')       0.00      0.00      0.00         0
       ('Ice', 'Fairy')       1.00      1.00      1.00         1
        ('Ice', 'Fire')       1.00      1.00      1.00         1
      ('Ice', 'Flying')       0.00      0.00      0.00         0
       ('Ice', 'Ghost')       1.00      1.00      1.00         1
      ('Ice', 'Ground')       0.00      0.00      0.00         1
        ('Ice', 'None')       0.50      0.50      0.50         4
       ('Ice', 'Water')       1.00      1.00      1.00         1
   ('Normal', 'Dragon')       1.00      1.00      1.00         1
    ('Normal', 'Fairy')       0.00      0.00      0.00         1
 ('Normal', 'Fighting')       0.00      0.00      0.00         1
   ('Normal', 'Flying')       0.83      0.83      0.83         6
    ('Normal', 'Grass')       0.00      0.00      0.00         0
   ('Normal', 'Ground')       1.00      1.00      1.00         1
     ('Normal', 'None')       0.38      0.43      0.40        14
  ('Normal', 'Psychic')       0.00      0.00      0.00         1
    ('Normal', 'Water')       1.00      1.00      1.00         1
      ('Poison', 'Bug')       1.00      1.00      1.00         1
     ('Poison', 'Dark')       0.00      0.00      0.00         1
   ('Poison', 'Dragon')       0.00      0.00      0.00         1
    ('Poison', 'Fairy')       0.50      1.00      0.67         1
   ('Poison', 'Flying')       1.00      1.00      1.00         1
   ('Poison', 'Ground')       0.00      0.00      0.00         0
     ('Poison', 'None')       1.00      0.33      0.50         3
    ('Poison', 'Water')       0.00      0.00      0.00         1
    ('Psychic', 'Dark')       1.00      1.00      1.00         1
  ('Psychic', 'Dragon')       1.00      1.00      1.00         1
   ('Psychic', 'Fairy')       0.00      0.00      0.00         2
('Psychic', 'Fighting')       0.00      0.00      0.00         1
    ('Psychic', 'Fire')       1.00      1.00      1.00         1
  ('Psychic', 'Flying')       0.33      0.50      0.40         2
   ('Psychic', 'Ghost')       1.00      1.00      1.00         1
   ('Psychic', 'Grass')       0.00      0.00      0.00         0
     ('Psychic', 'Ice')       0.00      0.00      0.00         1
    ('Psychic', 'None')       0.57      0.44      0.50         9
   ('Rock', 'Electric')       1.00      1.00      1.00         1
      ('Rock', 'Fairy')       0.00      0.00      0.00         1
   ('Rock', 'Fighting')       1.00      1.00      1.00         1
     ('Rock', 'Flying')       1.00      1.00      1.00         1
     ('Rock', 'Ground')       0.00      0.00      0.00         1
       ('Rock', 'None')       0.50      0.67      0.57         3
     ('Rock', 'Poison')       0.50      1.00      0.67         1
      ('Rock', 'Steel')       0.00      0.00      0.00         1
      ('Rock', 'Water')       1.00      1.00      1.00         1
     ('Steel', 'Fairy')       0.00      0.00      0.00         1
  ('Steel', 'Fighting')       1.00      1.00      1.00         1
     ('Steel', 'Ghost')       0.00      0.00      0.00         1
      ('Steel', 'None')       0.00      0.00      0.00         2
   ('Steel', 'Psychic')       1.00      0.50      0.67         2
      ('Steel', 'Rock')       0.00      0.00      0.00         1
      ('Water', 'Dark')       0.00      0.00      0.00         2
    ('Water', 'Dragon')       0.00      0.00      0.00         1
     ('Water', 'Fairy')       1.00      1.00      1.00         1
  ('Water', 'Fighting')       1.00      1.00      1.00         1
    ('Water', 'Flying')       0.67      1.00      0.80         2
     ('Water', 'Grass')       1.00      1.00      1.00         1
    ('Water', 'Ground')       1.00      0.50      0.67         2
       ('Water', 'Ice')       0.00      0.00      0.00         1
      ('Water', 'None')       0.60      0.60      0.60        15
    ('Water', 'Poison')       0.00      0.00      0.00         1
   ('Water', 'Psychic')       0.00      0.00      0.00         1
      ('Water', 'Rock')       0.00      0.00      0.00         1
     ('Water', 'Steel')       1.00      1.00      1.00         1

               accuracy                           0.48       242
              macro avg       0.44      0.46      0.44       242
           weighted avg       0.48      0.48      0.47       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

# Import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
# Setup the parameters and distributions to sample from: param_dist
pipeline = make_pipeline(StandardScaler(), DecisionTreeClassifier())

param_dist = {
    "decisiontreeclassifier__max_depth": [15, 30, None],  
    "decisiontreeclassifier__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Accuracy: {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Tuned Model Parameters: {'decisiontreeclassifier__max_depth': None, 'decisiontreeclassifier__min_samples_leaf': 1}
Accuracy: 0.49173553719008267

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)
score = model.score(X_test, y_test)
# Calculate accuracy
print("Score :", score )
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score : 0.6487603305785123
Accuracy: 0.6487603305785123

# Import GridSearchCV
pipeline = make_pipeline(StandardScaler(), RandomForestClassifier())

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "randomforestclassifier__max_depth": np.arange(5, 25),  
    "randomforestclassifier__min_samples_leaf": np.arange(1, 10),
    "randomforestclassifier__n_estimators": np.arange(60, 140, 5)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=3.
  warnings.warn(

Best score is 0.6900826446280992
Best parameters are {'randomforestclassifier__max_depth': None, 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__n_estimators': 136}

from sklearn.svm import SVC

model = SVC(kernel='rbf', random_state=42)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.06198347107438017

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1,5, 10],        # Regularization parameter
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best score is {}".format(grid_search.best_estimator_.score(X_test, y_test)))

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 10, 'svc__coef0': 0.0}
Best score is 0.6074380165289256

svm_classifier = SVC(kernel='linear', random_state=42)

# Train the SVM classifier
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.3677685950413223

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='linear', random_state=42))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1 , 5, 10],        # Regularization parameter    # Degree of the polynomial kernel
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 0.5, 'svc__coef0': 0.0}
Best Score: 0.6528925619834711

model = SVC(kernel='poly', random_state=42)

# Train the SVM classifier
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.0743801652892562

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='poly'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1, 5, 10],        # Regularization parameter
    'svc__degree': [2, 3, 4, 5, 6],      # Degree of the polynomial kernel
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 10, 'svc__coef0': 2.0, 'svc__degree': 2}
Best Score: 0.6198347107438017

model = SVC(kernel='sigmoid', random_state=42)

# Train the SVM classifier
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.05785123966942149
Accuracy: 0.05785123966942149

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='sigmoid'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1,5, 10],        # Regularization parameter
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 10, 'svc__coef0': 0.0}
Best Score: 0.6033057851239669

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ",model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.06611570247933884
Accuracy: 0.06611570247933884

from sklearn.neighbors import KNeighborsClassifier
param_grid = {
    'kneighborsclassifier__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'kneighborsclassifier__n_neighbors': 3}
Best Score: 0.2809917355371901

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.1446280991735537
Accuracy: 0.1446280991735537

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

model = LogisticRegression( random_state=42, multi_class='auto', solver='liblinear', max_iter=1000) 
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.5950413223140496
Accuracy: 0.5950413223140496

param_grid = {
    'logisticregression__C': np.logspace(-5, 5, 5),
    'logisticregression__penalty': ['l1', 'l2']
}

pipeline = make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear'))

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=2)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=2.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(

Best Parameters: {'logisticregression__C': 1.0, 'logisticregression__penalty': 'l2'}
Best Score: 0.6776859504132231

from sklearn.linear_model import LogisticRegression
model = LogisticRegression( penalty='elasticnet',l1_ratio=0.5, random_state=42, multi_class='auto', solver='saga', max_iter=100) 
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.1115702479338843
Accuracy: 0.1115702479338843

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

param_grid = {
    'logisticregression__C': np.logspace(-3, 3, 3),
    'logisticregression__l1_ratio': np.linspace(0, 1, 5)
}

pipeline = make_pipeline(StandardScaler(), LogisticRegression(penalty='elasticnet', solver='saga'))

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=2)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=2.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Best Parameters: {'logisticregression__C': 1.0, 'logisticregression__l1_ratio': 0.25}
Best Score: 0.6611570247933884

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

df = preprocessed_df.copy()

df['Types'] = df[['type_1', 'type_2']].apply(lambda x: sorted(tuple(filter(lambda y: pd.notna(y), x))), axis=1)

df.Types = df.Types.astype(str)

# drop the Type 1 and Type 2 columns
df.drop(['type_1', 'type_2'], axis=1, inplace=True)

# print head
df.head()

sns.countplot(df, y='Types');

['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Check if there are any pokemon with only one type
singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()
singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]


print("Number of singleton classes",len(singleton_classes))
print("number of unique type combinations",len(df['Types'].unique()))
print(len(df['Types']))
df.head()

Number of singleton classes 23
number of unique type combinations 154
1044

# Split the data into training and testing sets
X = df.drop(columns=['Types'])
y = df['Types']

X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=['Types']), other_data['Types'], test_size=0.2, stratify=other_data['Types'], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=['Types'])])
y_train = pd.concat([y_train, singleton_data['Types']])
X_test = pd.concat([X_test, singleton_data.drop(columns=['Types'])])
y_test = pd.concat([y_test, singleton_data['Types']])

# Initialize and train the decision tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.4298245614035088
Accuracy: 0.4298245614035088

# Setup the parameters and distributions to sample from: param_dist
pipeline = make_pipeline(StandardScaler(), DecisionTreeClassifier())

param_dist = {
    "decisiontreeclassifier__max_depth": [15, 30, None],  
    "decisiontreeclassifier__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Accuracy: {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Tuned Model Parameters: {'decisiontreeclassifier__max_depth': None, 'decisiontreeclassifier__min_samples_leaf': 1}
Accuracy: 0.4649122807017544

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.5964912280701754
Accuracy: 0.5964912280701754

pipeline = make_pipeline(StandardScaler(), RandomForestClassifier())

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "randomforestclassifier__max_depth": np.arange(30, 36),  
    "randomforestclassifier__min_samples_leaf": np.arange(1, 10, 4),
    "randomforestclassifier__n_estimators": np.arange(100, 140, 4)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=3.
  warnings.warn(

Best score is 0.5964912280701754
Best parameters are {'randomforestclassifier__max_depth': 33, 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__n_estimators': 136}

from sklearn.svm import SVC

model = SVC(kernel='rbf', random_state=42)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.05263157894736842

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1,5, 10],        # Regularization parameter
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 10, 'svc__coef0': 0.0}
Best Score: 0.5482456140350878

svm_classifier = SVC(kernel='linear', random_state=42)

# Train the SVM classifier
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.34210526315789475

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='linear'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1 , 5, 10],        # Regularization parameter    # Degree of the polynomial kernel
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 0.5, 'svc__coef0': 0.0}
Best Score: 0.5921052631578947

model = SVC(kernel='poly', random_state=42)

# Train the SVM classifier
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.07017543859649122

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='poly'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1, 5, 10],        # Regularization parameter
    'svc__degree': [2, 3, 4, 5, 6],      # Degree of the polynomial kernel
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 10, 'svc__coef0': 2.0, 'svc__degree': 3}
Best Score: 0.5789473684210527

model = SVC(kernel='sigmoid', random_state=42)

# Train the SVM classifier
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.07456140350877193
Accuracy: 0.07456140350877193

from sklearn.svm import SVC

# Define the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC(kernel='sigmoid'))

# Define the parameter grid
param_grid = {
    'svc__C': [0.1,0.5, 1,5, 10, 50],        # Regularization parameter
    'svc__coef0': [0.0, 1.0, 2.0], # Independent term in the polynomial kernel function
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'svc__C': 50, 'svc__coef0': 0.0}
Best Score: 0.5614035087719298

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.10087719298245613
Accuracy: 0.10087719298245613

param_grid = {
    'kneighborsclassifier__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

Best Parameters: {'kneighborsclassifier__n_neighbors': 1}
Best Score: 0.5921052631578947

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.14473684210526316
Accuracy: 0.14473684210526316

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

model = LogisticRegression( random_state=42, multi_class='auto', solver='liblinear', max_iter=1000) 
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.5657894736842105
Accuracy: 0.5657894736842105

param_grid = {
    'logisticregression__C': np.logspace(-4, 4, 3),
    'logisticregression__penalty': ['l1', 'l2']
}

pipeline = make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear'))

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=2)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=2.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\svm\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(

Best Parameters: {'logisticregression__C': 1.0, 'logisticregression__penalty': 'l2'}
Best Score: 0.6776859504132231

from sklearn.linear_model import LogisticRegression
model = LogisticRegression( penalty='elasticnet',l1_ratio=0.5, random_state=42, multi_class='auto', solver='saga', max_iter=100) 
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Score:  0.1115702479338843
Accuracy: 0.1115702479338843

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

param_grid = {
    'logisticregression__C': [1.0],
    'logisticregression__l1_ratio': np.linspace(0, 1, 5)
}

pipeline = make_pipeline(StandardScaler(), LogisticRegression(penalty='elasticnet', solver='saga'))

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=2)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\model_selection\_split.py:737: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=2.
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Best Parameters: {'logisticregression__C': 1.0, 'logisticregression__l1_ratio': 0.25}
Best Score: 0.6611570247933884

c:\Users\Chau\miniconda3\Lib\site-packages\sklearn\linear_model\_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

df = preprocessed_df.copy()

df['Types'] = df[['type_1', 'type_2']].apply(lambda x: tuple(filter(lambda y: pd.notna(y), x)), axis=1)
df.Types = df.Types.astype(str)

# drop the Type 1 and Type 2 columns
df.drop(['type_1', 'type_2'], axis=1, inplace=True)

# print head
df.head()

# Find classes with only one type
singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()

# Create binary labels for each Pokémon type combination
type_combinations = df['Types'].unique()
for type in type_combinations:
    df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)

singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]

print("Number of singleton classes",len(singleton_classes))
print("number of unique type combinations",len(df['Types'].unique()))
print(len(df['Types']))
df.head()

Number of singleton classes 41
number of unique type combinations 192
1044

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\3350647679.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)

# Drop the 'Types' column
df = df.drop(columns=['Types'])
other_data.drop(columns=['Types'], inplace=True)
singleton_data.drop(columns=['Types'], inplace=True)

# Split the data into training and testing sets
y = df[type_combinations]

X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=type_combinations), other_data[type_combinations], test_size=0.2, stratify=other_data[type_combinations], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=type_combinations)])
y_train = pd.concat([y_train, singleton_data[type_combinations]])
X_test = pd.concat([X_test, singleton_data.drop(columns=type_combinations)])
y_test = pd.concat([y_test, singleton_data[type_combinations]])

# Initialize and train the decision tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)
score = model.score(X_test, y_test)

# Calculate accuracy
print("Score: ", score)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.47107438016528924
Accuracy:  0.47107438016528924
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       0.60      0.86      0.71         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         0
           4       0.45      0.33      0.38        15
           5       0.43      0.75      0.55         4
           6       0.33      0.33      0.33         3
           7       0.25      0.33      0.29         3
           8       0.83      0.83      0.83         6
           9       0.33      0.50      0.40        14
          10       0.00      0.00      0.00         1
          11       0.25      0.67      0.36         3
          12       1.00      0.14      0.25         7
          13       1.00      1.00      1.00         1
          14       0.00      0.00      0.00         4
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.33      0.25      0.29         4
          18       1.00      0.25      0.40         4
          19       1.00      1.00      1.00         1
          20       1.00      1.00      1.00         1
          21       1.00      1.00      1.00         1
          22       1.00      1.00      1.00         1
          23       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       0.57      0.67      0.62         6
          27       1.00      1.00      1.00         1
          28       0.38      0.33      0.35         9
          29       0.00      0.00      0.00         1
          30       1.00      1.00      1.00         1
          31       1.00      1.00      1.00         1
          32       0.50      0.50      0.50         2
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         0
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
          37       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          39       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          41       0.00      0.00      0.00         0
          42       1.00      1.00      1.00         1
          43       1.00      1.00      1.00         1
          44       0.62      0.89      0.73         9
          45       0.00      0.00      0.00         1
          46       0.00      0.00      0.00         0
          47       1.00      0.50      0.67         2
          48       0.00      0.00      0.00         2
          49       0.00      0.00      0.00         1
          50       1.00      1.00      1.00         1
          51       0.00      0.00      0.00         0
          52       0.00      0.00      0.00         2
          53       0.00      0.00      0.00         1
          54       0.00      0.00      0.00         0
          55       0.00      0.00      0.00         1
          56       0.50      0.67      0.57         3
          57       0.00      0.00      0.00         1
          58       0.00      0.00      0.00         1
          59       0.00      0.00      0.00         0
          60       0.00      0.00      0.00         0
          61       0.00      0.00      0.00         0
          62       1.00      1.00      1.00         1
          63       0.33      0.33      0.33         3
          64       1.00      0.50      0.67         2
          65       0.00      0.00      0.00         2
          66       0.20      0.33      0.25         3
          67       0.00      0.00      0.00         1
          68       0.00      0.00      0.00         2
          69       0.00      0.00      0.00         1
          70       0.00      0.00      0.00         0
          71       0.00      0.00      0.00         1
          72       0.00      0.00      0.00         1
          73       0.00      0.00      0.00         0
          74       1.00      1.00      1.00         1
          75       0.00      0.00      0.00         1
          76       0.00      0.00      0.00         1
          77       0.00      0.00      0.00         0
          78       0.00      0.00      0.00         1
          79       0.00      0.00      0.00         1
          80       0.00      0.00      0.00         0
          81       0.00      0.00      0.00         0
          82       0.00      0.00      0.00         1
          83       1.00      1.00      1.00         1
          84       0.50      1.00      0.67         1
          85       0.00      0.00      0.00         1
          86       0.00      0.00      0.00         1
          87       0.00      0.00      0.00         0
          88       1.00      1.00      1.00         1
          89       0.00      0.00      0.00         0
          90       0.00      0.00      0.00         1
          91       1.00      1.00      1.00         1
          92       0.00      0.00      0.00         1
          93       0.00      0.00      0.00         1
          94       0.00      0.00      0.00         0
          95       0.50      1.00      0.67         1
          96       0.00      0.00      0.00         0
          97       0.00      0.00      0.00         0
          98       0.00      0.00      0.00         0
          99       0.00      0.00      0.00         0
         100       0.00      0.00      0.00         1
         101       0.00      0.00      0.00         2
         102       0.50      1.00      0.67         1
         103       1.00      1.00      1.00         1
         104       1.00      1.00      1.00         1
         105       1.00      1.00      1.00         1
         106       0.33      1.00      0.50         1
         107       0.00      0.00      0.00         1
         108       0.00      0.00      0.00         1
         109       0.00      0.00      0.00         1
         110       0.50      1.00      0.67         1
         111       0.00      0.00      0.00         1
         112       0.00      0.00      0.00         1
         113       1.00      1.00      1.00         1
         114       0.00      0.00      0.00         0
         115       0.00      0.00      0.00         1
         116       1.00      1.00      1.00         1
         117       0.50      1.00      0.67         1
         118       0.00      0.00      0.00         1
         119       0.00      0.00      0.00         1
         120       0.00      0.00      0.00         0
         121       0.00      0.00      0.00         1
         122       0.00      0.00      0.00         0
         123       1.00      1.00      1.00         1
         124       0.00      0.00      0.00         0
         125       0.33      1.00      0.50         1
         126       1.00      1.00      1.00         1
         127       1.00      1.00      1.00         1
         128       0.00      0.00      0.00         0
         129       1.00      1.00      1.00         1
         130       0.00      0.00      0.00         0
         131       0.00      0.00      0.00         1
         132       0.00      0.00      0.00         0
         133       0.00      0.00      0.00         0
         134       0.00      0.00      0.00         1
         135       0.00      0.00      0.00         1
         136       1.00      1.00      1.00         1
         137       1.00      1.00      1.00         1
         138       0.00      0.00      0.00         0
         139       0.00      0.00      0.00         1
         140       0.00      0.00      0.00         0
         141       1.00      1.00      1.00         1
         142       1.00      1.00      1.00         1
         143       0.50      1.00      0.67         1
         144       0.50      1.00      0.67         1
         145       1.00      1.00      1.00         1
         146       0.00      0.00      0.00         1
         147       1.00      1.00      1.00         1
         148       0.00      0.00      0.00         0
         149       0.00      0.00      0.00         0
         150       0.00      0.00      0.00         1
         151       0.00      0.00      0.00         0
         152       0.00      0.00      0.00         1
         153       0.00      0.00      0.00         1
         154       0.00      0.00      0.00         0
         155       0.00      0.00      0.00         0
         156       0.00      0.00      0.00         0
         157       0.00      0.00      0.00         0
         158       0.00      0.00      0.00         1
         159       0.50      0.50      0.50         2
         160       0.00      0.00      0.00         0
         161       0.00      0.00      0.00         1
         162       1.00      1.00      1.00         1
         163       1.00      1.00      1.00         1
         164       1.00      1.00      1.00         1
         165       1.00      1.00      1.00         1
         166       1.00      1.00      1.00         1
         167       0.00      0.00      0.00         0
         168       0.00      0.00      0.00         0
         169       0.00      0.00      0.00         0
         170       0.00      0.00      0.00         0
         171       1.00      1.00      1.00         1
         172       0.50      1.00      0.67         1
         173       0.00      0.00      0.00         0
         174       0.00      0.00      0.00         0
         175       1.00      1.00      1.00         1
         176       1.00      1.00      1.00         1
         177       1.00      1.00      1.00         1
         178       1.00      1.00      1.00         1
         179       0.00      0.00      0.00         0
         180       0.00      0.00      0.00         0
         181       1.00      1.00      1.00         1
         182       0.00      0.00      0.00         1
         183       0.00      0.00      0.00         0
         184       1.00      1.00      1.00         1
         185       0.00      0.00      0.00         0
         186       0.00      0.00      0.00         0
         187       0.00      0.00      0.00         0
         188       1.00      1.00      1.00         1
         189       1.00      1.00      1.00         1
         190       0.00      0.00      0.00         1
         191       1.00      1.00      1.00         1

   micro avg       0.48      0.47      0.48       242
   macro avg       0.33      0.35      0.33       242
weighted avg       0.46      0.47      0.44       242
 samples avg       0.47      0.47      0.47       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

pipeline = make_pipeline(StandardScaler(), DecisionTreeClassifier())

# Setup the parameters
param_dist = {
    "decisiontreeclassifier__max_depth": [15, 30, None],  
    "decisiontreeclassifier__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Accuracy: {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))
print(classification_report(y_test, y_pred))

Tuned Model Parameters: {'decisiontreeclassifier__max_depth': None, 'decisiontreeclassifier__min_samples_leaf': 1}
Accuracy: 0.47520661157024796
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       0.57      0.57      0.57         7
           2       0.33      0.50      0.40         2
           3       0.00      0.00      0.00         0
           4       0.45      0.33      0.38        15
           5       0.43      0.75      0.55         4
           6       0.50      0.67      0.57         3
           7       0.50      0.33      0.40         3
           8       0.83      0.83      0.83         6
           9       0.35      0.43      0.39        14
          10       0.00      0.00      0.00         1
          11       0.25      0.67      0.36         3
          12       0.50      0.14      0.22         7
          13       1.00      1.00      1.00         1
          14       1.00      0.25      0.40         4
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.50      0.25      0.33         4
          18       0.50      0.25      0.33         4
          19       1.00      1.00      1.00         1
          20       1.00      1.00      1.00         1
          21       1.00      1.00      1.00         1
          22       1.00      1.00      1.00         1
          23       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       0.80      0.67      0.73         6
          27       0.25      1.00      0.40         1
          28       0.33      0.22      0.27         9
          29       0.00      0.00      0.00         1
          30       1.00      1.00      1.00         1
          31       1.00      1.00      1.00         1
          32       0.50      0.50      0.50         2
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         0
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
          37       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          39       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          41       0.00      0.00      0.00         0
          42       1.00      1.00      1.00         1
          43       1.00      1.00      1.00         1
          44       0.73      0.89      0.80         9
          45       0.00      0.00      0.00         1
          46       0.00      0.00      0.00         0
          47       1.00      0.50      0.67         2
          48       0.00      0.00      0.00         2
          49       0.00      0.00      0.00         1
          50       1.00      1.00      1.00         1
          51       0.00      0.00      0.00         0
          52       0.00      0.00      0.00         2
          53       0.00      0.00      0.00         1
          54       0.00      0.00      0.00         0
          55       0.00      0.00      0.00         1
          56       1.00      0.67      0.80         3
          57       0.00      0.00      0.00         1
          58       0.00      0.00      0.00         1
          59       0.00      0.00      0.00         0
          60       0.00      0.00      0.00         0
          61       0.00      0.00      0.00         0
          62       0.00      0.00      0.00         1
          63       0.50      0.33      0.40         3
          64       1.00      0.50      0.67         2
          65       0.00      0.00      0.00         2
          66       0.67      0.67      0.67         3
          67       0.00      0.00      0.00         1
          68       0.50      0.50      0.50         2
          69       0.00      0.00      0.00         1
          70       0.00      0.00      0.00         0
          71       0.00      0.00      0.00         1
          72       0.00      0.00      0.00         1
          73       0.00      0.00      0.00         0
          74       1.00      1.00      1.00         1
          75       0.00      0.00      0.00         1
          76       0.00      0.00      0.00         1
          77       0.00      0.00      0.00         0
          78       0.00      0.00      0.00         1
          79       0.00      0.00      0.00         1
          80       0.00      0.00      0.00         0
          81       0.00      0.00      0.00         0
          82       0.00      0.00      0.00         1
          83       1.00      1.00      1.00         1
          84       0.00      0.00      0.00         1
          85       0.00      0.00      0.00         1
          86       0.00      0.00      0.00         1
          87       0.00      0.00      0.00         0
          88       1.00      1.00      1.00         1
          89       0.00      0.00      0.00         0
          90       0.00      0.00      0.00         1
          91       1.00      1.00      1.00         1
          92       0.00      0.00      0.00         1
          93       0.00      0.00      0.00         1
          94       0.00      0.00      0.00         0
          95       1.00      1.00      1.00         1
          96       0.00      0.00      0.00         0
          97       0.00      0.00      0.00         0
          98       0.00      0.00      0.00         0
          99       0.00      0.00      0.00         0
         100       0.00      0.00      0.00         1
         101       1.00      0.50      0.67         2
         102       0.50      1.00      0.67         1
         103       1.00      1.00      1.00         1
         104       1.00      1.00      1.00         1
         105       1.00      1.00      1.00         1
         106       0.25      1.00      0.40         1
         107       0.00      0.00      0.00         1
         108       0.00      0.00      0.00         1
         109       0.00      0.00      0.00         1
         110       0.50      1.00      0.67         1
         111       0.00      0.00      0.00         1
         112       0.00      0.00      0.00         1
         113       1.00      1.00      1.00         1
         114       0.00      0.00      0.00         0
         115       0.50      1.00      0.67         1
         116       1.00      1.00      1.00         1
         117       0.50      1.00      0.67         1
         118       0.00      0.00      0.00         1
         119       0.00      0.00      0.00         1
         120       0.00      0.00      0.00         0
         121       0.00      0.00      0.00         1
         122       0.00      0.00      0.00         0
         123       1.00      1.00      1.00         1
         124       0.00      0.00      0.00         0
         125       1.00      1.00      1.00         1
         126       1.00      1.00      1.00         1
         127       1.00      1.00      1.00         1
         128       0.00      0.00      0.00         0
         129       1.00      1.00      1.00         1
         130       0.00      0.00      0.00         0
         131       0.00      0.00      0.00         1
         132       0.00      0.00      0.00         0
         133       0.00      0.00      0.00         0
         134       0.00      0.00      0.00         1
         135       0.00      0.00      0.00         1
         136       1.00      1.00      1.00         1
         137       1.00      1.00      1.00         1
         138       0.00      0.00      0.00         0
         139       0.00      0.00      0.00         1
         140       0.00      0.00      0.00         0
         141       1.00      1.00      1.00         1
         142       1.00      1.00      1.00         1
         143       0.50      1.00      0.67         1
         144       1.00      1.00      1.00         1
         145       1.00      1.00      1.00         1
         146       0.00      0.00      0.00         1
         147       0.50      1.00      0.67         1
         148       0.00      0.00      0.00         0
         149       0.00      0.00      0.00         0
         150       0.00      0.00      0.00         1
         151       0.00      0.00      0.00         0
         152       0.00      0.00      0.00         1
         153       0.00      0.00      0.00         1
         154       0.00      0.00      0.00         0
         155       0.00      0.00      0.00         0
         156       0.00      0.00      0.00         0
         157       0.00      0.00      0.00         0
         158       0.00      0.00      0.00         1
         159       0.50      0.50      0.50         2
         160       0.00      0.00      0.00         0
         161       0.00      0.00      0.00         1
         162       1.00      1.00      1.00         1
         163       0.50      1.00      0.67         1
         164       1.00      1.00      1.00         1
         165       0.50      1.00      0.67         1
         166       0.50      1.00      0.67         1
         167       0.00      0.00      0.00         0
         168       0.00      0.00      0.00         0
         169       0.00      0.00      0.00         0
         170       0.00      0.00      0.00         0
         171       0.50      1.00      0.67         1
         172       0.25      1.00      0.40         1
         173       0.00      0.00      0.00         0
         174       0.00      0.00      0.00         0
         175       1.00      1.00      1.00         1
         176       1.00      1.00      1.00         1
         177       1.00      1.00      1.00         1
         178       1.00      1.00      1.00         1
         179       0.00      0.00      0.00         0
         180       0.00      0.00      0.00         0
         181       1.00      1.00      1.00         1
         182       0.00      0.00      0.00         1
         183       0.00      0.00      0.00         0
         184       1.00      1.00      1.00         1
         185       0.00      0.00      0.00         0
         186       0.00      0.00      0.00         0
         187       0.00      0.00      0.00         0
         188       1.00      1.00      1.00         1
         189       1.00      1.00      1.00         1
         190       0.00      0.00      0.00         1
         191       1.00      1.00      1.00         1

   micro avg       0.49      0.48      0.48       242
   macro avg       0.33      0.35      0.33       242
weighted avg       0.49      0.48      0.46       242
 samples avg       0.48      0.48      0.48       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)
score = model.score(X_test, y_test)
# Calculate accuracy
print("Score: ", score)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.25206611570247933
Accuracy: 0.25206611570247933
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       1.00      0.43      0.60         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         0
           4       0.80      0.27      0.40        15
           5       1.00      0.25      0.40         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.33      0.17      0.22         6
           9       0.50      0.14      0.22        14
          10       0.00      0.00      0.00         1
          11       1.00      0.67      0.80         3
          12       0.00      0.00      0.00         7
          13       1.00      1.00      1.00         1
          14       0.00      0.00      0.00         4
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       1.00      0.25      0.40         4
          18       0.00      0.00      0.00         4
          19       1.00      1.00      1.00         1
          20       0.00      0.00      0.00         1
          21       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          23       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       1.00      0.33      0.50         6
          27       1.00      1.00      1.00         1
          28       0.00      0.00      0.00         9
          29       0.00      0.00      0.00         1
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         1
          32       0.00      0.00      0.00         2
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         0
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
          37       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          39       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          41       0.00      0.00      0.00         0
          42       1.00      1.00      1.00         1
          43       1.00      1.00      1.00         1
          44       0.67      0.22      0.33         9
          45       0.00      0.00      0.00         1
          46       0.00      0.00      0.00         0
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         2
          49       0.00      0.00      0.00         1
          50       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         0
          52       0.00      0.00      0.00         2
          53       0.00      0.00      0.00         1
          54       0.00      0.00      0.00         0
          55       0.00      0.00      0.00         1
          56       1.00      0.33      0.50         3
          57       0.00      0.00      0.00         1
          58       0.00      0.00      0.00         1
          59       0.00      0.00      0.00         0
          60       0.00      0.00      0.00         0
          61       0.00      0.00      0.00         0
          62       0.00      0.00      0.00         1
          63       0.00      0.00      0.00         3
          64       0.00      0.00      0.00         2
          65       0.00      0.00      0.00         2
          66       0.00      0.00      0.00         3
          67       0.00      0.00      0.00         1
          68       0.00      0.00      0.00         2
          69       0.00      0.00      0.00         1
          70       0.00      0.00      0.00         0
          71       0.00      0.00      0.00         1
          72       0.00      0.00      0.00         1
          73       0.00      0.00      0.00         0
          74       1.00      1.00      1.00         1
          75       0.00      0.00      0.00         1
          76       0.00      0.00      0.00         1
          77       0.00      0.00      0.00         0
          78       0.00      0.00      0.00         1
          79       0.00      0.00      0.00         1
          80       0.00      0.00      0.00         0
          81       0.00      0.00      0.00         0
          82       0.00      0.00      0.00         1
          83       0.00      0.00      0.00         1
          84       0.00      0.00      0.00         1
          85       0.00      0.00      0.00         1
          86       0.00      0.00      0.00         1
          87       0.00      0.00      0.00         0
          88       1.00      1.00      1.00         1
          89       0.00      0.00      0.00         0
          90       0.00      0.00      0.00         1
          91       1.00      1.00      1.00         1
          92       0.00      0.00      0.00         1
          93       0.00      0.00      0.00         1
          94       0.00      0.00      0.00         0
          95       1.00      1.00      1.00         1
          96       0.00      0.00      0.00         0
          97       0.00      0.00      0.00         0
          98       0.00      0.00      0.00         0
          99       0.00      0.00      0.00         0
         100       0.00      0.00      0.00         1
         101       0.00      0.00      0.00         2
         102       0.00      0.00      0.00         1
         103       1.00      1.00      1.00         1
         104       1.00      1.00      1.00         1
         105       1.00      1.00      1.00         1
         106       1.00      1.00      1.00         1
         107       0.00      0.00      0.00         1
         108       0.00      0.00      0.00         1
         109       0.00      0.00      0.00         1
         110       1.00      1.00      1.00         1
         111       0.00      0.00      0.00         1
         112       0.00      0.00      0.00         1
         113       1.00      1.00      1.00         1
         114       0.00      0.00      0.00         0
         115       0.00      0.00      0.00         1
         116       1.00      1.00      1.00         1
         117       1.00      1.00      1.00         1
         118       0.00      0.00      0.00         1
         119       0.00      0.00      0.00         1
         120       0.00      0.00      0.00         0
         121       0.00      0.00      0.00         1
         122       0.00      0.00      0.00         0
         123       1.00      1.00      1.00         1
         124       0.00      0.00      0.00         0
         125       1.00      1.00      1.00         1
         126       0.00      0.00      0.00         1
         127       0.00      0.00      0.00         1
         128       0.00      0.00      0.00         0
         129       1.00      1.00      1.00         1
         130       0.00      0.00      0.00         0
         131       0.00      0.00      0.00         1
         132       0.00      0.00      0.00         0
         133       0.00      0.00      0.00         0
         134       0.00      0.00      0.00         1
         135       0.00      0.00      0.00         1
         136       0.00      0.00      0.00         1
         137       1.00      1.00      1.00         1
         138       0.00      0.00      0.00         0
         139       0.00      0.00      0.00         1
         140       0.00      0.00      0.00         0
         141       1.00      1.00      1.00         1
         142       1.00      1.00      1.00         1
         143       0.00      0.00      0.00         1
         144       1.00      1.00      1.00         1
         145       1.00      1.00      1.00         1
         146       0.00      0.00      0.00         1
         147       1.00      1.00      1.00         1
         148       0.00      0.00      0.00         0
         149       0.00      0.00      0.00         0
         150       0.00      0.00      0.00         1
         151       0.00      0.00      0.00         0
         152       0.00      0.00      0.00         1
         153       0.00      0.00      0.00         1
         154       0.00      0.00      0.00         0
         155       0.00      0.00      0.00         0
         156       0.00      0.00      0.00         0
         157       0.00      0.00      0.00         0
         158       0.00      0.00      0.00         1
         159       1.00      0.50      0.67         2
         160       0.00      0.00      0.00         0
         161       0.00      0.00      0.00         1
         162       1.00      1.00      1.00         1
         163       1.00      1.00      1.00         1
         164       1.00      1.00      1.00         1
         165       1.00      1.00      1.00         1
         166       1.00      1.00      1.00         1
         167       0.00      0.00      0.00         0
         168       0.00      0.00      0.00         0
         169       0.00      0.00      0.00         0
         170       0.00      0.00      0.00         0
         171       1.00      1.00      1.00         1
         172       1.00      1.00      1.00         1
         173       0.00      0.00      0.00         0
         174       0.00      0.00      0.00         0
         175       1.00      1.00      1.00         1
         176       1.00      1.00      1.00         1
         177       1.00      1.00      1.00         1
         178       1.00      1.00      1.00         1
         179       0.00      0.00      0.00         0
         180       0.00      0.00      0.00         0
         181       1.00      1.00      1.00         1
         182       0.00      0.00      0.00         1
         183       0.00      0.00      0.00         0
         184       0.00      0.00      0.00         1
         185       0.00      0.00      0.00         0
         186       0.00      0.00      0.00         0
         187       0.00      0.00      0.00         0
         188       0.00      0.00      0.00         1
         189       1.00      1.00      1.00         1
         190       0.00      0.00      0.00         1
         191       1.00      1.00      1.00         1

   micro avg       0.86      0.25      0.39       242
   macro avg       0.26      0.23      0.24       242
weighted avg       0.41      0.25      0.29       242
 samples avg       0.25      0.25      0.25       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import RandomizedSearchCV
pipeline = make_pipeline(StandardScaler(), RandomForestClassifier())

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "randomforestclassifier__max_depth": [15, 30, None],  
    "randomforestclassifier__min_samples_leaf": np.arange(1, 10, 4),
    "randomforestclassifier__n_estimators": np.arange(80, 120, 5)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))
print(classification_report(y_test, y_pred))

Best score is 0.256198347107438
Best parameters are {'randomforestclassifier__n_estimators': 85, 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__max_depth': None}
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       1.00      0.43      0.60         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         0
           4       0.80      0.27      0.40        15
           5       1.00      0.25      0.40         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.33      0.17      0.22         6
           9       0.50      0.14      0.22        14
          10       0.00      0.00      0.00         1
          11       1.00      0.67      0.80         3
          12       0.00      0.00      0.00         7
          13       1.00      1.00      1.00         1
          14       0.00      0.00      0.00         4
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       1.00      0.25      0.40         4
          18       0.00      0.00      0.00         4
          19       1.00      1.00      1.00         1
          20       0.00      0.00      0.00         1
          21       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          23       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       1.00      0.33      0.50         6
          27       1.00      1.00      1.00         1
          28       0.00      0.00      0.00         9
          29       0.00      0.00      0.00         1
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         1
          32       0.00      0.00      0.00         2
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         0
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
          37       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          39       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          41       0.00      0.00      0.00         0
          42       1.00      1.00      1.00         1
          43       1.00      1.00      1.00         1
          44       0.67      0.22      0.33         9
          45       0.00      0.00      0.00         1
          46       0.00      0.00      0.00         0
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         2
          49       0.00      0.00      0.00         1
          50       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         0
          52       0.00      0.00      0.00         2
          53       0.00      0.00      0.00         1
          54       0.00      0.00      0.00         0
          55       0.00      0.00      0.00         1
          56       1.00      0.33      0.50         3
          57       0.00      0.00      0.00         1
          58       0.00      0.00      0.00         1
          59       0.00      0.00      0.00         0
          60       0.00      0.00      0.00         0
          61       0.00      0.00      0.00         0
          62       0.00      0.00      0.00         1
          63       0.00      0.00      0.00         3
          64       0.00      0.00      0.00         2
          65       0.00      0.00      0.00         2
          66       0.00      0.00      0.00         3
          67       0.00      0.00      0.00         1
          68       0.00      0.00      0.00         2
          69       0.00      0.00      0.00         1
          70       0.00      0.00      0.00         0
          71       0.00      0.00      0.00         1
          72       0.00      0.00      0.00         1
          73       0.00      0.00      0.00         0
          74       1.00      1.00      1.00         1
          75       0.00      0.00      0.00         1
          76       0.00      0.00      0.00         1
          77       0.00      0.00      0.00         0
          78       0.00      0.00      0.00         1
          79       0.00      0.00      0.00         1
          80       0.00      0.00      0.00         0
          81       0.00      0.00      0.00         0
          82       0.00      0.00      0.00         1
          83       0.00      0.00      0.00         1
          84       0.00      0.00      0.00         1
          85       0.00      0.00      0.00         1
          86       0.00      0.00      0.00         1
          87       0.00      0.00      0.00         0
          88       1.00      1.00      1.00         1
          89       0.00      0.00      0.00         0
          90       0.00      0.00      0.00         1
          91       1.00      1.00      1.00         1
          92       0.00      0.00      0.00         1
          93       0.00      0.00      0.00         1
          94       0.00      0.00      0.00         0
          95       1.00      1.00      1.00         1
          96       0.00      0.00      0.00         0
          97       0.00      0.00      0.00         0
          98       0.00      0.00      0.00         0
          99       0.00      0.00      0.00         0
         100       0.00      0.00      0.00         1
         101       0.00      0.00      0.00         2
         102       0.00      0.00      0.00         1
         103       1.00      1.00      1.00         1
         104       1.00      1.00      1.00         1
         105       1.00      1.00      1.00         1
         106       1.00      1.00      1.00         1
         107       0.00      0.00      0.00         1
         108       0.00      0.00      0.00         1
         109       0.00      0.00      0.00         1
         110       1.00      1.00      1.00         1
         111       0.00      0.00      0.00         1
         112       0.00      0.00      0.00         1
         113       1.00      1.00      1.00         1
         114       0.00      0.00      0.00         0
         115       0.00      0.00      0.00         1
         116       1.00      1.00      1.00         1
         117       1.00      1.00      1.00         1
         118       0.00      0.00      0.00         1
         119       0.00      0.00      0.00         1
         120       0.00      0.00      0.00         0
         121       0.00      0.00      0.00         1
         122       0.00      0.00      0.00         0
         123       1.00      1.00      1.00         1
         124       0.00      0.00      0.00         0
         125       1.00      1.00      1.00         1
         126       0.00      0.00      0.00         1
         127       0.00      0.00      0.00         1
         128       0.00      0.00      0.00         0
         129       1.00      1.00      1.00         1
         130       0.00      0.00      0.00         0
         131       0.00      0.00      0.00         1
         132       0.00      0.00      0.00         0
         133       0.00      0.00      0.00         0
         134       0.00      0.00      0.00         1
         135       0.00      0.00      0.00         1
         136       0.00      0.00      0.00         1
         137       1.00      1.00      1.00         1
         138       0.00      0.00      0.00         0
         139       0.00      0.00      0.00         1
         140       0.00      0.00      0.00         0
         141       1.00      1.00      1.00         1
         142       1.00      1.00      1.00         1
         143       0.00      0.00      0.00         1
         144       1.00      1.00      1.00         1
         145       1.00      1.00      1.00         1
         146       0.00      0.00      0.00         1
         147       1.00      1.00      1.00         1
         148       0.00      0.00      0.00         0
         149       0.00      0.00      0.00         0
         150       0.00      0.00      0.00         1
         151       0.00      0.00      0.00         0
         152       0.00      0.00      0.00         1
         153       0.00      0.00      0.00         1
         154       0.00      0.00      0.00         0
         155       0.00      0.00      0.00         0
         156       0.00      0.00      0.00         0
         157       0.00      0.00      0.00         0
         158       0.00      0.00      0.00         1
         159       1.00      0.50      0.67         2
         160       0.00      0.00      0.00         0
         161       0.00      0.00      0.00         1
         162       1.00      1.00      1.00         1
         163       1.00      1.00      1.00         1
         164       1.00      1.00      1.00         1
         165       1.00      1.00      1.00         1
         166       1.00      1.00      1.00         1
         167       0.00      0.00      0.00         0
         168       0.00      0.00      0.00         0
         169       0.00      0.00      0.00         0
         170       0.00      0.00      0.00         0
         171       1.00      1.00      1.00         1
         172       1.00      1.00      1.00         1
         173       0.00      0.00      0.00         0
         174       0.00      0.00      0.00         0
         175       1.00      1.00      1.00         1
         176       1.00      1.00      1.00         1
         177       1.00      1.00      1.00         1
         178       1.00      1.00      1.00         1
         179       0.00      0.00      0.00         0
         180       0.00      0.00      0.00         0
         181       1.00      1.00      1.00         1
         182       0.00      0.00      0.00         1
         183       0.00      0.00      0.00         0
         184       0.00      0.00      0.00         1
         185       0.00      0.00      0.00         0
         186       0.00      0.00      0.00         0
         187       0.00      0.00      0.00         0
         188       0.00      0.00      0.00         1
         189       1.00      1.00      1.00         1
         190       0.00      0.00      0.00         1
         191       1.00      1.00      1.00         1

   micro avg       0.86      0.25      0.39       242
   macro avg       0.26      0.23      0.24       242
weighted avg       0.41      0.25      0.29       242
 samples avg       0.25      0.25      0.25       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.03305785123966942
Accuracy: 0.03305785123966942
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.29      0.44         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00        15
           5       1.00      0.50      0.67         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         6
           9       0.75      0.21      0.33        14
          10       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         7
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         4
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.00      0.00      0.00         4
          18       0.00      0.00      0.00         4
          19       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         1
          21       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          23       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       0.00      0.00      0.00         6
          27       0.00      0.00      0.00         1
          28       0.00      0.00      0.00         9
          29       0.00      0.00      0.00         1
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         1
          32       0.00      0.00      0.00         2
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         0
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
          37       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          39       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          41       0.00      0.00      0.00         0
          42       0.00      0.00      0.00         1
          43       0.00      0.00      0.00         1
          44       0.00      0.00      0.00         9
          45       0.00      0.00      0.00         1
          46       0.00      0.00      0.00         0
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         2
          49       0.00      0.00      0.00         1
          50       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         0
          52       0.00      0.00      0.00         2
          53       0.00      0.00      0.00         1
          54       0.00      0.00      0.00         0
          55       0.00      0.00      0.00         1
          56       0.00      0.00      0.00         3
          57       0.00      0.00      0.00         1
          58       0.00      0.00      0.00         1
          59       0.00      0.00      0.00         0
          60       0.00      0.00      0.00         0
          61       0.00      0.00      0.00         0
          62       0.00      0.00      0.00         1
          63       0.00      0.00      0.00         3
          64       0.00      0.00      0.00         2
          65       0.00      0.00      0.00         2
          66       0.00      0.00      0.00         3
          67       0.00      0.00      0.00         1
          68       0.00      0.00      0.00         2
          69       0.00      0.00      0.00         1
          70       0.00      0.00      0.00         0
          71       0.00      0.00      0.00         1
          72       0.00      0.00      0.00         1
          73       0.00      0.00      0.00         0
          74       0.00      0.00      0.00         1
          75       0.00      0.00      0.00         1
          76       0.00      0.00      0.00         1
          77       0.00      0.00      0.00         0
          78       0.00      0.00      0.00         1
          79       0.00      0.00      0.00         1
          80       0.00      0.00      0.00         0
          81       0.00      0.00      0.00         0
          82       0.00      0.00      0.00         1
          83       0.00      0.00      0.00         1
          84       0.00      0.00      0.00         1
          85       0.00      0.00      0.00         1
          86       0.00      0.00      0.00         1
          87       0.00      0.00      0.00         0
          88       0.00      0.00      0.00         1
          89       0.00      0.00      0.00         0
          90       0.00      0.00      0.00         1
          91       0.00      0.00      0.00         1
          92       0.00      0.00      0.00         1
          93       0.00      0.00      0.00         1
          94       0.00      0.00      0.00         0
          95       0.00      0.00      0.00         1
          96       0.00      0.00      0.00         0
          97       0.00      0.00      0.00         0
          98       0.00      0.00      0.00         0
          99       0.00      0.00      0.00         0
         100       0.00      0.00      0.00         1
         101       0.00      0.00      0.00         2
         102       0.00      0.00      0.00         1
         103       0.00      0.00      0.00         1
         104       0.00      0.00      0.00         1
         105       0.00      0.00      0.00         1
         106       0.00      0.00      0.00         1
         107       0.00      0.00      0.00         1
         108       0.00      0.00      0.00         1
         109       0.00      0.00      0.00         1
         110       0.00      0.00      0.00         1
         111       0.00      0.00      0.00         1
         112       0.00      0.00      0.00         1
         113       0.00      0.00      0.00         1
         114       0.00      0.00      0.00         0
         115       0.00      0.00      0.00         1
         116       0.00      0.00      0.00         1
         117       0.00      0.00      0.00         1
         118       0.00      0.00      0.00         1
         119       0.00      0.00      0.00         1
         120       0.00      0.00      0.00         0
         121       0.00      0.00      0.00         1
         122       0.00      0.00      0.00         0
         123       0.00      0.00      0.00         1
         124       0.00      0.00      0.00         0
         125       0.00      0.00      0.00         1
         126       0.00      0.00      0.00         1
         127       0.00      0.00      0.00         1
         128       0.00      0.00      0.00         0
         129       0.00      0.00      0.00         1
         130       0.00      0.00      0.00         0
         131       0.00      0.00      0.00         1
         132       0.00      0.00      0.00         0
         133       0.00      0.00      0.00         0
         134       0.00      0.00      0.00         1
         135       0.00      0.00      0.00         1
         136       0.00      0.00      0.00         1
         137       0.00      0.00      0.00         1
         138       0.00      0.00      0.00         0
         139       0.00      0.00      0.00         1
         140       0.00      0.00      0.00         0
         141       0.00      0.00      0.00         1
         142       0.00      0.00      0.00         1
         143       0.00      0.00      0.00         1
         144       0.00      0.00      0.00         1
         145       0.00      0.00      0.00         1
         146       0.00      0.00      0.00         1
         147       0.00      0.00      0.00         1
         148       0.00      0.00      0.00         0
         149       0.00      0.00      0.00         0
         150       0.00      0.00      0.00         1
         151       0.00      0.00      0.00         0
         152       0.00      0.00      0.00         1
         153       0.00      0.00      0.00         1
         154       0.00      0.00      0.00         0
         155       0.00      0.00      0.00         0
         156       0.00      0.00      0.00         0
         157       0.00      0.00      0.00         0
         158       0.00      0.00      0.00         1
         159       0.50      0.50      0.50         2
         160       0.00      0.00      0.00         0
         161       0.00      0.00      0.00         1
         162       0.00      0.00      0.00         1
         163       0.00      0.00      0.00         1
         164       0.00      0.00      0.00         1
         165       0.00      0.00      0.00         1
         166       0.00      0.00      0.00         1
         167       0.00      0.00      0.00         0
         168       0.00      0.00      0.00         0
         169       0.00      0.00      0.00         0
         170       0.00      0.00      0.00         0
         171       0.00      0.00      0.00         1
         172       0.00      0.00      0.00         1
         173       0.00      0.00      0.00         0
         174       0.00      0.00      0.00         0
         175       0.00      0.00      0.00         1
         176       0.00      0.00      0.00         1
         177       0.00      0.00      0.00         1
         178       0.00      0.00      0.00         1
         179       0.00      0.00      0.00         0
         180       0.00      0.00      0.00         0
         181       0.00      0.00      0.00         1
         182       0.00      0.00      0.00         1
         183       0.00      0.00      0.00         0
         184       0.00      0.00      0.00         1
         185       0.00      0.00      0.00         0
         186       0.00      0.00      0.00         0
         187       0.00      0.00      0.00         0
         188       0.00      0.00      0.00         1
         189       0.00      0.00      0.00         1
         190       0.00      0.00      0.00         1
         191       0.00      0.00      0.00         1

   micro avg       0.44      0.03      0.06       242
   macro avg       0.02      0.01      0.01       242
weighted avg       0.09      0.03      0.05       242
 samples avg       0.03      0.03      0.03       242

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

param_grid = {
    'kneighborsclassifier__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)
print(classification_report(y_test, y_pred))

Best Parameters: {'kneighborsclassifier__n_neighbors': 3}
Best Score: 0.2231404958677686

df  = preprocessed_df.copy()

# Combine Type 1 and Type 2 into a single column
df['Types'] = df[['type_1', 'type_2']].apply(lambda x: tuple(filter(lambda y: pd.notna(y), x)), axis=1)
print(df['Types'][0])

# Get unique Pokémon types
unique_types = np.unique(df['Types'].explode())
df.drop(['type_1', 'type_2'], axis=1, inplace=True)

df.head()

('Grass', 'Poison')

# Create binary labels for each Pokémon type
for type in unique_types:
    df[type] = df['Types'].apply(lambda x: 1 if type in x else 0)

df.head()

# Some type combinations only occur once so we double them to stratify the data better
singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()
singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]

df = df.drop(columns=['Types'])
other_data.drop(columns=['Types'], inplace=True)
singleton_data.drop(columns=['Types'], inplace=True)

# Split the data into training and testing sets
y = df[unique_types]
X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=unique_types), other_data[unique_types], test_size=0.2, stratify=other_data[unique_types], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=unique_types)])
y_train = pd.concat([y_train, singleton_data[unique_types]])
X_test = pd.concat([X_test, singleton_data.drop(columns=unique_types)])
y_test = pd.concat([y_test, singleton_data[unique_types]])

# Initialize and train the decision tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Score: ", model.score(X_test, y_test))
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.49173553719008267
Accuracy: 0.49173553719008267
              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.60      0.50      0.55        18
           2       0.71      0.79      0.75        19
           3       0.63      0.63      0.63        19
           4       0.54      0.82      0.65        17
           5       0.79      0.58      0.67        19
           6       0.76      0.83      0.79        23
           7       0.66      0.68      0.67        28
           8       0.68      0.68      0.68        19
           9       1.00      0.72      0.84        29
          10       0.52      0.55      0.54        20
          11       0.29      0.38      0.33        13
          12       0.69      0.69      0.69       101
          13       0.74      0.74      0.74        27
          14       0.62      0.56      0.59        18
          15       0.50      0.50      0.50        26
          16       0.80      0.53      0.64        15
          17       0.43      0.53      0.47        17
          18       0.84      0.73      0.78        37

   micro avg       0.68      0.67      0.67       484
   macro avg       0.67      0.65      0.65       484
weighted avg       0.69      0.67      0.68       484
 samples avg       0.68      0.67      0.67       484

pipeline = make_pipeline(StandardScaler(), DecisionTreeClassifier())

param_dist = {
    "decisiontreeclassifier__max_depth": [15, 30, None],  
    "decisiontreeclassifier__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Accuracy: {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))
print(classification_report(y_test, y_pred))

Tuned Model Parameters: {'decisiontreeclassifier__max_depth': None, 'decisiontreeclassifier__min_samples_leaf': 1}
Accuracy: 0.5165289256198347
              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.52      0.61      0.56        18
           2       0.74      0.74      0.74        19
           3       0.60      0.63      0.62        19
           4       0.60      0.88      0.71        17
           5       0.79      0.58      0.67        19
           6       0.76      0.83      0.79        23
           7       0.66      0.68      0.67        28
           8       0.75      0.79      0.77        19
           9       0.92      0.76      0.83        29
          10       0.73      0.55      0.63        20
          11       0.50      0.46      0.48        13
          12       0.70      0.68      0.69       101
          13       0.70      0.78      0.74        27
          14       0.63      0.67      0.65        18
          15       0.48      0.38      0.43        26
          16       0.64      0.60      0.62        15
          17       0.45      0.53      0.49        17
          18       0.79      0.73      0.76        37

   micro avg       0.69      0.68      0.68       484
   macro avg       0.68      0.67      0.67       484
weighted avg       0.69      0.68      0.68       484
 samples avg       0.69      0.68      0.68       484

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.29338842975206614
Accuracy: 0.29338842975206614
              precision    recall  f1-score   support

           0       1.00      0.89      0.94        19
           1       0.80      0.22      0.35        18
           2       1.00      0.58      0.73        19
           3       1.00      0.37      0.54        19
           4       1.00      0.41      0.58        17
           5       1.00      0.42      0.59        19
           6       1.00      0.61      0.76        23
           7       1.00      0.46      0.63        28
           8       1.00      0.58      0.73        19
           9       1.00      0.69      0.82        29
          10       1.00      0.30      0.46        20
          11       1.00      0.31      0.47        13
          12       0.77      0.73      0.75       101
          13       0.82      0.33      0.47        27
          14       1.00      0.22      0.36        18
          15       1.00      0.35      0.51        26
          16       1.00      0.33      0.50        15
          17       1.00      0.29      0.45        17
          18       1.00      0.51      0.68        37

   micro avg       0.91      0.51      0.65       484
   macro avg       0.97      0.45      0.60       484
weighted avg       0.93      0.51      0.63       484
 samples avg       0.69      0.51      0.57       484

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint  
# Define the parameter grid
pipeline = make_pipeline(StandardScaler(), RandomForestClassifier())

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "randomforestclassifier__max_depth": [15, 30, None],  
    "randomforestclassifier__min_samples_leaf": np.arange(1, 10, 4),
    "randomforestclassifier__n_estimators": np.arange(120, 140, 4)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))
print(classification_report(y_test, y_pred))

Best score is 0.3305785123966942
Best parameters are {'randomforestclassifier__max_depth': None, 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__n_estimators': 120}

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.02066115702479339
Accuracy: 0.02066115702479339
              precision    recall  f1-score   support

           0       0.50      0.16      0.24        19
           1       0.00      0.00      0.00        18
           2       0.67      0.32      0.43        19
           3       1.00      0.21      0.35        19
           4       1.00      0.18      0.30        17
           5       0.67      0.11      0.18        19
           6       0.40      0.09      0.14        23
           7       0.38      0.18      0.24        28
           8       0.00      0.00      0.00        19
           9       0.40      0.07      0.12        29
          10       0.50      0.10      0.17        20
          11       0.33      0.08      0.12        13
          12       0.48      0.46      0.47       101
          13       0.67      0.37      0.48        27
          14       0.00      0.00      0.00        18
          15       1.00      0.08      0.14        26
          16       0.00      0.00      0.00        15
          17       0.00      0.00      0.00        17
          18       0.24      0.11      0.15        37

   micro avg       0.49      0.19      0.27       484
   macro avg       0.43      0.13      0.19       484
weighted avg       0.45      0.19      0.24       484
 samples avg       0.32      0.19      0.23       484

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

param_grid = {
    'kneighborsclassifier__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)
print(classification_report(y_test, y_pred))

Best Parameters: {'kneighborsclassifier__n_neighbors': 3}
Best Score: 0.2727272727272727
              precision    recall  f1-score   support

           0       0.50      0.16      0.24        19
           1       0.00      0.00      0.00        18
           2       0.67      0.32      0.43        19
           3       1.00      0.21      0.35        19
           4       1.00      0.18      0.30        17
           5       0.67      0.11      0.18        19
           6       0.40      0.09      0.14        23
           7       0.38      0.18      0.24        28
           8       0.00      0.00      0.00        19
           9       0.40      0.07      0.12        29
          10       0.50      0.10      0.17        20
          11       0.33      0.08      0.12        13
          12       0.48      0.46      0.47       101
          13       0.67      0.37      0.48        27
          14       0.00      0.00      0.00        18
          15       1.00      0.08      0.14        26
          16       0.00      0.00      0.00        15
          17       0.00      0.00      0.00        17
          18       0.24      0.11      0.15        37

   micro avg       0.49      0.19      0.27       484
   macro avg       0.43      0.13      0.19       484
weighted avg       0.45      0.19      0.24       484
 samples avg       0.32      0.19      0.23       484

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

df = preprocessed_df.copy()

df['Types'] = df[['type_1', 'type_2']].apply(lambda x: sorted(tuple(filter(lambda y: pd.notna(y), x))), axis=1)

df['Types'] = df['Types'].astype(str)

# drop the Type 1 and Type 2 columns
df.drop(['type_1', 'type_2'], axis=1, inplace=True)
# print head
df.head()

# Find classes with only one type
singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()

# Create binary labels for each Pokémon type combination
unique_type_combinations = df['Types'].unique()
for type_combination in unique_type_combinations:
    df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)

singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]

print("Number of singleton classes",len(singleton_classes))
print("number of unique type combinations",len(df['Types'].unique()))
print(len(df['Types']))
df.head()

Number of singleton classes 23
number of unique type combinations 154
1044

C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)
C:\Users\thors\AppData\Local\Temp\ipykernel_20284\1134825012.py:4: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df[type_combination] = df['Types'].apply(lambda x: 1 if type in x else 0)

# Drop the 'Types' column
df = df.drop(columns=['Types'])
other_data.drop(columns=['Types'], inplace=True)
singleton_data.drop(columns=['Types'], inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=unique_type_combinations), other_data[unique_type_combinations], test_size=0.2, stratify=other_data[unique_type_combinations], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=unique_type_combinations)])
y_train = pd.concat([y_train, singleton_data[unique_type_combinations]])
X_test = pd.concat([X_test, singleton_data.drop(columns=unique_type_combinations)])
y_test = pd.concat([y_test, singleton_data[unique_type_combinations]])


# Initialize and train the decision tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.9692982456140351
Accuracy: 0.9692982456140351
              precision    recall  f1-score   support

           0       0.88      0.91      0.90        33
           1       0.88      0.91      0.90        33
           2       0.88      0.91      0.90        33
           3       0.88      0.91      0.90        33
           4       0.88      0.91      0.90        33
           5       0.88      0.91      0.90        33
           6       0.88      0.91      0.90        33
           7       0.88      0.91      0.90        33
           8       0.88      0.91      0.90        33
           9       0.88      0.91      0.90        33
          10       0.88      0.91      0.90        33
          11       0.88      0.91      0.90        33
          12       0.88      0.91      0.90        33
          13       0.88      0.91      0.90        33
          14       0.88      0.91      0.90        33
          15       0.88      0.91      0.90        33
          16       0.88      0.91      0.90        33
          17       0.88      0.91      0.90        33
          18       0.88      0.91      0.90        33
          19       0.88      0.91      0.90        33
          20       0.88      0.91      0.90        33
          21       0.88      0.91      0.90        33
          22       0.88      0.91      0.90        33
          23       0.88      0.91      0.90        33
          24       0.88      0.91      0.90        33
          25       0.88      0.91      0.90        33
          26       0.88      0.91      0.90        33
          27       0.88      0.91      0.90        33
          28       0.88      0.91      0.90        33
          29       0.88      0.91      0.90        33
          30       0.88      0.91      0.90        33
          31       0.88      0.91      0.90        33
          32       0.88      0.91      0.90        33
          33       0.88      0.91      0.90        33
          34       0.88      0.91      0.90        33
          35       0.88      0.91      0.90        33
          36       0.88      0.91      0.90        33
          37       0.88      0.91      0.90        33
          38       0.88      0.91      0.90        33
          39       0.88      0.91      0.90        33
          40       0.88      0.91      0.90        33
          41       0.88      0.91      0.90        33
          42       0.88      0.91      0.90        33
          43       0.88      0.91      0.90        33
          44       0.88      0.91      0.90        33
          45       0.88      0.91      0.90        33
          46       0.88      0.91      0.90        33
          47       0.88      0.91      0.90        33
          48       0.88      0.91      0.90        33
          49       0.88      0.91      0.90        33
          50       0.88      0.91      0.90        33
          51       0.88      0.91      0.90        33
          52       0.88      0.91      0.90        33
          53       0.88      0.91      0.90        33
          54       0.88      0.91      0.90        33
          55       0.88      0.91      0.90        33
          56       0.88      0.91      0.90        33
          57       0.88      0.91      0.90        33
          58       0.88      0.91      0.90        33
          59       0.88      0.91      0.90        33
          60       0.88      0.91      0.90        33
          61       0.88      0.91      0.90        33
          62       0.88      0.91      0.90        33
          63       0.88      0.91      0.90        33
          64       0.88      0.91      0.90        33
          65       0.88      0.91      0.90        33
          66       0.88      0.91      0.90        33
          67       0.88      0.91      0.90        33
          68       0.88      0.91      0.90        33
          69       0.88      0.91      0.90        33
          70       0.88      0.91      0.90        33
          71       0.88      0.91      0.90        33
          72       0.88      0.91      0.90        33
          73       0.88      0.91      0.90        33
          74       0.88      0.91      0.90        33
          75       0.88      0.91      0.90        33
          76       0.88      0.91      0.90        33
          77       0.88      0.91      0.90        33
          78       0.88      0.91      0.90        33
          79       0.88      0.91      0.90        33
          80       0.88      0.91      0.90        33
          81       0.88      0.91      0.90        33
          82       0.88      0.91      0.90        33
          83       0.88      0.91      0.90        33
          84       0.88      0.91      0.90        33
          85       0.88      0.91      0.90        33
          86       0.88      0.91      0.90        33
          87       0.88      0.91      0.90        33
          88       0.88      0.91      0.90        33
          89       0.88      0.91      0.90        33
          90       0.88      0.91      0.90        33
          91       0.88      0.91      0.90        33
          92       0.88      0.91      0.90        33
          93       0.88      0.91      0.90        33
          94       0.88      0.91      0.90        33
          95       0.88      0.91      0.90        33
          96       0.88      0.91      0.90        33
          97       0.88      0.91      0.90        33
          98       0.88      0.91      0.90        33
          99       0.88      0.91      0.90        33
         100       0.88      0.91      0.90        33
         101       0.88      0.91      0.90        33
         102       0.88      0.91      0.90        33
         103       0.88      0.91      0.90        33
         104       0.88      0.91      0.90        33
         105       0.88      0.91      0.90        33
         106       0.88      0.91      0.90        33
         107       0.88      0.91      0.90        33
         108       0.88      0.91      0.90        33
         109       0.88      0.91      0.90        33
         110       0.88      0.91      0.90        33
         111       0.88      0.91      0.90        33
         112       0.88      0.91      0.90        33
         113       0.88      0.91      0.90        33
         114       0.88      0.91      0.90        33
         115       0.88      0.91      0.90        33
         116       0.88      0.91      0.90        33
         117       0.88      0.91      0.90        33
         118       0.88      0.91      0.90        33
         119       0.88      0.91      0.90        33
         120       0.88      0.91      0.90        33
         121       0.88      0.91      0.90        33
         122       0.88      0.91      0.90        33
         123       0.88      0.91      0.90        33
         124       0.88      0.91      0.90        33
         125       0.88      0.91      0.90        33
         126       0.88      0.91      0.90        33
         127       0.88      0.91      0.90        33
         128       0.88      0.91      0.90        33
         129       0.88      0.91      0.90        33
         130       0.88      0.91      0.90        33
         131       0.88      0.91      0.90        33
         132       0.88      0.91      0.90        33
         133       0.88      0.91      0.90        33
         134       0.88      0.91      0.90        33
         135       0.88      0.91      0.90        33
         136       0.88      0.91      0.90        33
         137       0.88      0.91      0.90        33
         138       0.88      0.91      0.90        33
         139       0.88      0.91      0.90        33
         140       0.88      0.91      0.90        33
         141       0.88      0.91      0.90        33
         142       0.88      0.91      0.90        33
         143       0.88      0.91      0.90        33
         144       0.88      0.91      0.90        33
         145       0.88      0.91      0.90        33
         146       0.88      0.91      0.90        33
         147       0.88      0.91      0.90        33
         148       0.88      0.91      0.90        33
         149       0.88      0.91      0.90        33
         150       0.88      0.91      0.90        33
         151       0.88      0.91      0.90        33
         152       0.88      0.91      0.90        33
         153       0.88      0.91      0.90        33

   micro avg       0.88      0.91      0.90      5082
   macro avg       0.88      0.91      0.90      5082
weighted avg       0.88      0.91      0.90      5082
 samples avg       0.13      0.13      0.13      5082

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in samples with no true labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in samples with no true nor predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

pipeline = make_pipeline(StandardScaler(), DecisionTreeClassifier())

param_dist = {
    "decisiontreeclassifier__max_depth": [15, 30, None],  
    "decisiontreeclassifier__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Accuracy: {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))
print(classification_report(y_test, y_pred))

Tuned Model Parameters: {'decisiontreeclassifier__max_depth': 30, 'decisiontreeclassifier__min_samples_leaf': 1}
Accuracy: 0.9692982456140351
              precision    recall  f1-score   support

           0       0.86      0.94      0.90        33
           1       0.86      0.94      0.90        33
           2       0.86      0.94      0.90        33
           3       0.86      0.94      0.90        33
           4       0.86      0.94      0.90        33
           5       0.86      0.94      0.90        33
           6       0.86      0.94      0.90        33
           7       0.86      0.94      0.90        33
           8       0.86      0.94      0.90        33
           9       0.86      0.94      0.90        33
          10       0.86      0.94      0.90        33
          11       0.86      0.94      0.90        33
          12       0.86      0.94      0.90        33
          13       0.86      0.94      0.90        33
          14       0.86      0.94      0.90        33
          15       0.86      0.94      0.90        33
          16       0.86      0.94      0.90        33
          17       0.86      0.94      0.90        33
          18       0.86      0.94      0.90        33
          19       0.86      0.94      0.90        33
          20       0.86      0.94      0.90        33
          21       0.86      0.94      0.90        33
          22       0.86      0.94      0.90        33
          23       0.86      0.94      0.90        33
          24       0.86      0.94      0.90        33
          25       0.86      0.94      0.90        33
          26       0.86      0.94      0.90        33
          27       0.86      0.94      0.90        33
          28       0.86      0.94      0.90        33
          29       0.86      0.94      0.90        33
          30       0.86      0.94      0.90        33
          31       0.86      0.94      0.90        33
          32       0.86      0.94      0.90        33
          33       0.86      0.94      0.90        33
          34       0.86      0.94      0.90        33
          35       0.86      0.94      0.90        33
          36       0.86      0.94      0.90        33
          37       0.86      0.94      0.90        33
          38       0.86      0.94      0.90        33
          39       0.86      0.94      0.90        33
          40       0.86      0.94      0.90        33
          41       0.86      0.94      0.90        33
          42       0.86      0.94      0.90        33
          43       0.86      0.94      0.90        33
          44       0.86      0.94      0.90        33
          45       0.86      0.94      0.90        33
          46       0.86      0.94      0.90        33
          47       0.86      0.94      0.90        33
          48       0.86      0.94      0.90        33
          49       0.86      0.94      0.90        33
          50       0.86      0.94      0.90        33
          51       0.86      0.94      0.90        33
          52       0.86      0.94      0.90        33
          53       0.86      0.94      0.90        33
          54       0.86      0.94      0.90        33
          55       0.86      0.94      0.90        33
          56       0.86      0.94      0.90        33
          57       0.86      0.94      0.90        33
          58       0.86      0.94      0.90        33
          59       0.86      0.94      0.90        33
          60       0.86      0.94      0.90        33
          61       0.86      0.94      0.90        33
          62       0.86      0.94      0.90        33
          63       0.86      0.94      0.90        33
          64       0.86      0.94      0.90        33
          65       0.86      0.94      0.90        33
          66       0.86      0.94      0.90        33
          67       0.86      0.94      0.90        33
          68       0.86      0.94      0.90        33
          69       0.86      0.94      0.90        33
          70       0.86      0.94      0.90        33
          71       0.86      0.94      0.90        33
          72       0.86      0.94      0.90        33
          73       0.86      0.94      0.90        33
          74       0.86      0.94      0.90        33
          75       0.86      0.94      0.90        33
          76       0.86      0.94      0.90        33
          77       0.86      0.94      0.90        33
          78       0.86      0.94      0.90        33
          79       0.86      0.94      0.90        33
          80       0.86      0.94      0.90        33
          81       0.86      0.94      0.90        33
          82       0.86      0.94      0.90        33
          83       0.86      0.94      0.90        33
          84       0.86      0.94      0.90        33
          85       0.86      0.94      0.90        33
          86       0.86      0.94      0.90        33
          87       0.86      0.94      0.90        33
          88       0.86      0.94      0.90        33
          89       0.86      0.94      0.90        33
          90       0.86      0.94      0.90        33
          91       0.86      0.94      0.90        33
          92       0.86      0.94      0.90        33
          93       0.86      0.94      0.90        33
          94       0.86      0.94      0.90        33
          95       0.86      0.94      0.90        33
          96       0.86      0.94      0.90        33
          97       0.86      0.94      0.90        33
          98       0.86      0.94      0.90        33
          99       0.86      0.94      0.90        33
         100       0.86      0.94      0.90        33
         101       0.86      0.94      0.90        33
         102       0.86      0.94      0.90        33
         103       0.86      0.94      0.90        33
         104       0.86      0.94      0.90        33
         105       0.86      0.94      0.90        33
         106       0.86      0.94      0.90        33
         107       0.86      0.94      0.90        33
         108       0.86      0.94      0.90        33
         109       0.86      0.94      0.90        33
         110       0.86      0.94      0.90        33
         111       0.86      0.94      0.90        33
         112       0.86      0.94      0.90        33
         113       0.86      0.94      0.90        33
         114       0.86      0.94      0.90        33
         115       0.86      0.94      0.90        33
         116       0.86      0.94      0.90        33
         117       0.86      0.94      0.90        33
         118       0.86      0.94      0.90        33
         119       0.86      0.94      0.90        33
         120       0.86      0.94      0.90        33
         121       0.86      0.94      0.90        33
         122       0.86      0.94      0.90        33
         123       0.86      0.94      0.90        33
         124       0.86      0.94      0.90        33
         125       0.86      0.94      0.90        33
         126       0.86      0.94      0.90        33
         127       0.86      0.94      0.90        33
         128       0.86      0.94      0.90        33
         129       0.86      0.94      0.90        33
         130       0.86      0.94      0.90        33
         131       0.86      0.94      0.90        33
         132       0.86      0.94      0.90        33
         133       0.86      0.94      0.90        33
         134       0.86      0.94      0.90        33
         135       0.86      0.94      0.90        33
         136       0.86      0.94      0.90        33
         137       0.86      0.94      0.90        33
         138       0.86      0.94      0.90        33
         139       0.86      0.94      0.90        33
         140       0.86      0.94      0.90        33
         141       0.86      0.94      0.90        33
         142       0.86      0.94      0.90        33
         143       0.86      0.94      0.90        33
         144       0.86      0.94      0.90        33
         145       0.86      0.94      0.90        33
         146       0.86      0.94      0.90        33
         147       0.86      0.94      0.90        33
         148       0.86      0.94      0.90        33
         149       0.86      0.94      0.90        33
         150       0.86      0.94      0.90        33
         151       0.86      0.94      0.90        33
         152       0.86      0.94      0.90        33
         153       0.86      0.94      0.90        33

   micro avg       0.86      0.94      0.90      5082
   macro avg       0.86      0.94      0.90      5082
weighted avg       0.86      0.94      0.90      5082
 samples avg       0.14      0.14      0.14      5082

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in samples with no true labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in samples with no true nor predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

# Initialize and train the decision tree classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test,y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.9385964912280702
Accuracy: 0.9385964912280702
              precision    recall  f1-score   support

           0       0.95      0.61      0.74        33
           1       0.95      0.61      0.74        33
           2       0.95      0.61      0.74        33
           3       0.95      0.61      0.74        33
           4       0.95      0.61      0.74        33
           5       0.95      0.61      0.74        33
           6       0.95      0.61      0.74        33
           7       0.95      0.61      0.74        33
           8       0.95      0.61      0.74        33
           9       0.95      0.61      0.74        33
          10       0.95      0.61      0.74        33
          11       0.95      0.61      0.74        33
          12       0.95      0.61      0.74        33
          13       0.95      0.61      0.74        33
          14       0.95      0.61      0.74        33
          15       0.95      0.61      0.74        33
          16       0.95      0.61      0.74        33
          17       0.95      0.61      0.74        33
          18       0.95      0.61      0.74        33
          19       0.95      0.61      0.74        33
          20       0.95      0.61      0.74        33
          21       0.95      0.61      0.74        33
          22       0.95      0.61      0.74        33
          23       0.95      0.61      0.74        33
          24       0.95      0.61      0.74        33
          25       0.95      0.61      0.74        33
          26       0.95      0.61      0.74        33
          27       0.95      0.61      0.74        33
          28       0.95      0.61      0.74        33
          29       0.95      0.61      0.74        33
          30       0.95      0.61      0.74        33
          31       0.95      0.61      0.74        33
          32       0.95      0.61      0.74        33
          33       0.95      0.61      0.74        33
          34       0.95      0.61      0.74        33
          35       0.95      0.61      0.74        33
          36       0.95      0.61      0.74        33
          37       0.95      0.61      0.74        33
          38       0.95      0.61      0.74        33
          39       0.95      0.61      0.74        33
          40       0.95      0.61      0.74        33
          41       0.95      0.61      0.74        33
          42       0.95      0.61      0.74        33
          43       0.95      0.61      0.74        33
          44       0.95      0.61      0.74        33
          45       0.95      0.61      0.74        33
          46       0.95      0.61      0.74        33
          47       0.95      0.61      0.74        33
          48       0.95      0.61      0.74        33
          49       0.95      0.61      0.74        33
          50       0.95      0.61      0.74        33
          51       0.95      0.61      0.74        33
          52       0.95      0.61      0.74        33
          53       0.95      0.61      0.74        33
          54       0.95      0.61      0.74        33
          55       0.95      0.61      0.74        33
          56       0.95      0.61      0.74        33
          57       0.95      0.61      0.74        33
          58       0.95      0.61      0.74        33
          59       0.95      0.61      0.74        33
          60       0.95      0.61      0.74        33
          61       0.95      0.61      0.74        33
          62       0.95      0.61      0.74        33
          63       0.95      0.61      0.74        33
          64       0.95      0.61      0.74        33
          65       0.95      0.61      0.74        33
          66       0.95      0.61      0.74        33
          67       0.95      0.61      0.74        33
          68       0.95      0.61      0.74        33
          69       0.95      0.61      0.74        33
          70       0.95      0.61      0.74        33
          71       0.95      0.61      0.74        33
          72       0.95      0.61      0.74        33
          73       0.95      0.61      0.74        33
          74       0.95      0.61      0.74        33
          75       0.95      0.61      0.74        33
          76       0.95      0.61      0.74        33
          77       0.95      0.61      0.74        33
          78       0.95      0.61      0.74        33
          79       0.95      0.61      0.74        33
          80       0.95      0.61      0.74        33
          81       0.95      0.61      0.74        33
          82       0.95      0.61      0.74        33
          83       0.95      0.61      0.74        33
          84       0.95      0.61      0.74        33
          85       0.95      0.61      0.74        33
          86       0.95      0.61      0.74        33
          87       0.95      0.61      0.74        33
          88       0.95      0.61      0.74        33
          89       0.95      0.61      0.74        33
          90       0.95      0.61      0.74        33
          91       0.95      0.61      0.74        33
          92       0.95      0.61      0.74        33
          93       0.95      0.61      0.74        33
          94       0.95      0.61      0.74        33
          95       0.95      0.61      0.74        33
          96       0.95      0.61      0.74        33
          97       0.95      0.61      0.74        33
          98       0.95      0.61      0.74        33
          99       0.95      0.61      0.74        33
         100       0.95      0.61      0.74        33
         101       0.95      0.61      0.74        33
         102       0.95      0.61      0.74        33
         103       0.95      0.61      0.74        33
         104       0.95      0.61      0.74        33
         105       0.95      0.61      0.74        33
         106       0.95      0.61      0.74        33
         107       0.95      0.61      0.74        33
         108       0.95      0.61      0.74        33
         109       0.95      0.61      0.74        33
         110       0.95      0.61      0.74        33
         111       0.95      0.61      0.74        33
         112       0.95      0.61      0.74        33
         113       0.95      0.61      0.74        33
         114       0.95      0.61      0.74        33
         115       0.95      0.61      0.74        33
         116       0.95      0.61      0.74        33
         117       0.95      0.61      0.74        33
         118       0.95      0.61      0.74        33
         119       0.95      0.61      0.74        33
         120       0.95      0.61      0.74        33
         121       0.95      0.61      0.74        33
         122       0.95      0.61      0.74        33
         123       0.95      0.61      0.74        33
         124       0.95      0.61      0.74        33
         125       0.95      0.61      0.74        33
         126       0.95      0.61      0.74        33
         127       0.95      0.61      0.74        33
         128       0.95      0.61      0.74        33
         129       0.95      0.61      0.74        33
         130       0.95      0.61      0.74        33
         131       0.95      0.61      0.74        33
         132       0.95      0.61      0.74        33
         133       0.95      0.61      0.74        33
         134       0.95      0.61      0.74        33
         135       0.95      0.61      0.74        33
         136       0.95      0.61      0.74        33
         137       0.95      0.61      0.74        33
         138       0.95      0.61      0.74        33
         139       0.95      0.61      0.74        33
         140       0.95      0.61      0.74        33
         141       0.95      0.61      0.74        33
         142       0.95      0.61      0.74        33
         143       0.95      0.61      0.74        33
         144       0.95      0.61      0.74        33
         145       0.95      0.61      0.74        33
         146       0.95      0.61      0.74        33
         147       0.95      0.61      0.74        33
         148       0.95      0.61      0.74        33
         149       0.95      0.61      0.74        33
         150       0.95      0.61      0.74        33
         151       0.95      0.61      0.74        33
         152       0.95      0.61      0.74        33
         153       0.95      0.61      0.74        33

   micro avg       0.95      0.61      0.74      5082
   macro avg       0.95      0.61      0.74      5082
weighted avg       0.95      0.61      0.74      5082
 samples avg       0.09      0.09      0.09      5082

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in samples with no true labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in samples with no true nor predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

pipeline = make_pipeline(StandardScaler(), RandomForestClassifier())

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "randomforestclassifier__max_depth": [28, 30, 32, None],  
    "randomforestclassifier__min_samples_leaf": np.arange(1, 10, 4),
    "randomforestclassifier__n_estimators": np.arange(60, 100, 4)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))
print(classification_report(y_test, y_pred))

Best score is 0.956140350877193
Best parameters are {'randomforestclassifier__max_depth': 28, 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__n_estimators': 68}

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Calculate accuracy
print("Score: ", model.score(X_test, y_test))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Score:  0.8333333333333334
Accuracy: 0.8333333333333334
              precision    recall  f1-score   support

           0       0.14      0.03      0.05        33
           1       0.14      0.03      0.05        33
           2       0.14      0.03      0.05        33
           3       0.14      0.03      0.05        33
           4       0.14      0.03      0.05        33
           5       0.14      0.03      0.05        33
           6       0.14      0.03      0.05        33
           7       0.14      0.03      0.05        33
           8       0.14      0.03      0.05        33
           9       0.14      0.03      0.05        33
          10       0.14      0.03      0.05        33
          11       0.14      0.03      0.05        33
          12       0.14      0.03      0.05        33
          13       0.14      0.03      0.05        33
          14       0.14      0.03      0.05        33
          15       0.14      0.03      0.05        33
          16       0.14      0.03      0.05        33
          17       0.14      0.03      0.05        33
          18       0.14      0.03      0.05        33
          19       0.14      0.03      0.05        33
          20       0.14      0.03      0.05        33
          21       0.14      0.03      0.05        33
          22       0.14      0.03      0.05        33
          23       0.14      0.03      0.05        33
          24       0.14      0.03      0.05        33
          25       0.14      0.03      0.05        33
          26       0.14      0.03      0.05        33
          27       0.14      0.03      0.05        33
          28       0.14      0.03      0.05        33
          29       0.14      0.03      0.05        33
          30       0.14      0.03      0.05        33
          31       0.14      0.03      0.05        33
          32       0.14      0.03      0.05        33
          33       0.14      0.03      0.05        33
          34       0.14      0.03      0.05        33
          35       0.14      0.03      0.05        33
          36       0.14      0.03      0.05        33
          37       0.14      0.03      0.05        33
          38       0.14      0.03      0.05        33
          39       0.14      0.03      0.05        33
          40       0.14      0.03      0.05        33
          41       0.14      0.03      0.05        33
          42       0.14      0.03      0.05        33
          43       0.14      0.03      0.05        33
          44       0.14      0.03      0.05        33
          45       0.14      0.03      0.05        33
          46       0.14      0.03      0.05        33
          47       0.14      0.03      0.05        33
          48       0.14      0.03      0.05        33
          49       0.14      0.03      0.05        33
          50       0.14      0.03      0.05        33
          51       0.14      0.03      0.05        33
          52       0.14      0.03      0.05        33
          53       0.14      0.03      0.05        33
          54       0.14      0.03      0.05        33
          55       0.14      0.03      0.05        33
          56       0.14      0.03      0.05        33
          57       0.14      0.03      0.05        33
          58       0.14      0.03      0.05        33
          59       0.14      0.03      0.05        33
          60       0.14      0.03      0.05        33
          61       0.14      0.03      0.05        33
          62       0.14      0.03      0.05        33
          63       0.14      0.03      0.05        33
          64       0.14      0.03      0.05        33
          65       0.14      0.03      0.05        33
          66       0.14      0.03      0.05        33
          67       0.14      0.03      0.05        33
          68       0.14      0.03      0.05        33
          69       0.14      0.03      0.05        33
          70       0.14      0.03      0.05        33
          71       0.14      0.03      0.05        33
          72       0.14      0.03      0.05        33
          73       0.14      0.03      0.05        33
          74       0.14      0.03      0.05        33
          75       0.14      0.03      0.05        33
          76       0.14      0.03      0.05        33
          77       0.14      0.03      0.05        33
          78       0.14      0.03      0.05        33
          79       0.14      0.03      0.05        33
          80       0.14      0.03      0.05        33
          81       0.14      0.03      0.05        33
          82       0.14      0.03      0.05        33
          83       0.14      0.03      0.05        33
          84       0.14      0.03      0.05        33
          85       0.14      0.03      0.05        33
          86       0.14      0.03      0.05        33
          87       0.14      0.03      0.05        33
          88       0.14      0.03      0.05        33
          89       0.14      0.03      0.05        33
          90       0.14      0.03      0.05        33
          91       0.14      0.03      0.05        33
          92       0.14      0.03      0.05        33
          93       0.14      0.03      0.05        33
          94       0.14      0.03      0.05        33
          95       0.14      0.03      0.05        33
          96       0.14      0.03      0.05        33
          97       0.14      0.03      0.05        33
          98       0.14      0.03      0.05        33
          99       0.14      0.03      0.05        33
         100       0.14      0.03      0.05        33
         101       0.14      0.03      0.05        33
         102       0.14      0.03      0.05        33
         103       0.14      0.03      0.05        33
         104       0.14      0.03      0.05        33
         105       0.14      0.03      0.05        33
         106       0.14      0.03      0.05        33
         107       0.14      0.03      0.05        33
         108       0.14      0.03      0.05        33
         109       0.14      0.03      0.05        33
         110       0.14      0.03      0.05        33
         111       0.14      0.03      0.05        33
         112       0.14      0.03      0.05        33
         113       0.14      0.03      0.05        33
         114       0.14      0.03      0.05        33
         115       0.14      0.03      0.05        33
         116       0.14      0.03      0.05        33
         117       0.14      0.03      0.05        33
         118       0.14      0.03      0.05        33
         119       0.14      0.03      0.05        33
         120       0.14      0.03      0.05        33
         121       0.14      0.03      0.05        33
         122       0.14      0.03      0.05        33
         123       0.14      0.03      0.05        33
         124       0.14      0.03      0.05        33
         125       0.14      0.03      0.05        33
         126       0.14      0.03      0.05        33
         127       0.14      0.03      0.05        33
         128       0.14      0.03      0.05        33
         129       0.14      0.03      0.05        33
         130       0.14      0.03      0.05        33
         131       0.14      0.03      0.05        33
         132       0.14      0.03      0.05        33
         133       0.14      0.03      0.05        33
         134       0.14      0.03      0.05        33
         135       0.14      0.03      0.05        33
         136       0.14      0.03      0.05        33
         137       0.14      0.03      0.05        33
         138       0.14      0.03      0.05        33
         139       0.14      0.03      0.05        33
         140       0.14      0.03      0.05        33
         141       0.14      0.03      0.05        33
         142       0.14      0.03      0.05        33
         143       0.14      0.03      0.05        33
         144       0.14      0.03      0.05        33
         145       0.14      0.03      0.05        33
         146       0.14      0.03      0.05        33
         147       0.14      0.03      0.05        33
         148       0.14      0.03      0.05        33
         149       0.14      0.03      0.05        33
         150       0.14      0.03      0.05        33
         151       0.14      0.03      0.05        33
         152       0.14      0.03      0.05        33
         153       0.14      0.03      0.05        33

   micro avg       0.14      0.03      0.05      5082
   macro avg       0.14      0.03      0.05      5082
weighted avg       0.14      0.03      0.05      5082
 samples avg       0.00      0.00      0.00      5082

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in samples with no true labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in samples with no true nor predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

from sklearn.neighbors import KNeighborsClassifier
param_grid = {
    'kneighborsclassifier__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)
print(classification_report(y_test, y_pred))

Best Parameters: {'kneighborsclassifier__n_neighbors': 3}
Best Score: 0.9035087719298246
              precision    recall  f1-score   support

           0       0.14      0.03      0.05        33
           1       0.14      0.03      0.05        33
           2       0.14      0.03      0.05        33
           3       0.14      0.03      0.05        33
           4       0.14      0.03      0.05        33
           5       0.14      0.03      0.05        33
           6       0.14      0.03      0.05        33
           7       0.14      0.03      0.05        33
           8       0.14      0.03      0.05        33
           9       0.14      0.03      0.05        33
          10       0.14      0.03      0.05        33
          11       0.14      0.03      0.05        33
          12       0.14      0.03      0.05        33
          13       0.14      0.03      0.05        33
          14       0.14      0.03      0.05        33
          15       0.14      0.03      0.05        33
          16       0.14      0.03      0.05        33
          17       0.14      0.03      0.05        33
          18       0.14      0.03      0.05        33
          19       0.14      0.03      0.05        33
          20       0.14      0.03      0.05        33
          21       0.14      0.03      0.05        33
          22       0.14      0.03      0.05        33
          23       0.14      0.03      0.05        33
          24       0.14      0.03      0.05        33
          25       0.14      0.03      0.05        33
          26       0.14      0.03      0.05        33
          27       0.14      0.03      0.05        33
          28       0.14      0.03      0.05        33
          29       0.14      0.03      0.05        33
          30       0.14      0.03      0.05        33
          31       0.14      0.03      0.05        33
          32       0.14      0.03      0.05        33
          33       0.14      0.03      0.05        33
          34       0.14      0.03      0.05        33
          35       0.14      0.03      0.05        33
          36       0.14      0.03      0.05        33
          37       0.14      0.03      0.05        33
          38       0.14      0.03      0.05        33
          39       0.14      0.03      0.05        33
          40       0.14      0.03      0.05        33
          41       0.14      0.03      0.05        33
          42       0.14      0.03      0.05        33
          43       0.14      0.03      0.05        33
          44       0.14      0.03      0.05        33
          45       0.14      0.03      0.05        33
          46       0.14      0.03      0.05        33
          47       0.14      0.03      0.05        33
          48       0.14      0.03      0.05        33
          49       0.14      0.03      0.05        33
          50       0.14      0.03      0.05        33
          51       0.14      0.03      0.05        33
          52       0.14      0.03      0.05        33
          53       0.14      0.03      0.05        33
          54       0.14      0.03      0.05        33
          55       0.14      0.03      0.05        33
          56       0.14      0.03      0.05        33
          57       0.14      0.03      0.05        33
          58       0.14      0.03      0.05        33
          59       0.14      0.03      0.05        33
          60       0.14      0.03      0.05        33
          61       0.14      0.03      0.05        33
          62       0.14      0.03      0.05        33
          63       0.14      0.03      0.05        33
          64       0.14      0.03      0.05        33
          65       0.14      0.03      0.05        33
          66       0.14      0.03      0.05        33
          67       0.14      0.03      0.05        33
          68       0.14      0.03      0.05        33
          69       0.14      0.03      0.05        33
          70       0.14      0.03      0.05        33
          71       0.14      0.03      0.05        33
          72       0.14      0.03      0.05        33
          73       0.14      0.03      0.05        33
          74       0.14      0.03      0.05        33
          75       0.14      0.03      0.05        33
          76       0.14      0.03      0.05        33
          77       0.14      0.03      0.05        33
          78       0.14      0.03      0.05        33
          79       0.14      0.03      0.05        33
          80       0.14      0.03      0.05        33
          81       0.14      0.03      0.05        33
          82       0.14      0.03      0.05        33
          83       0.14      0.03      0.05        33
          84       0.14      0.03      0.05        33
          85       0.14      0.03      0.05        33
          86       0.14      0.03      0.05        33
          87       0.14      0.03      0.05        33
          88       0.14      0.03      0.05        33
          89       0.14      0.03      0.05        33
          90       0.14      0.03      0.05        33
          91       0.14      0.03      0.05        33
          92       0.14      0.03      0.05        33
          93       0.14      0.03      0.05        33
          94       0.14      0.03      0.05        33
          95       0.14      0.03      0.05        33
          96       0.14      0.03      0.05        33
          97       0.14      0.03      0.05        33
          98       0.14      0.03      0.05        33
          99       0.14      0.03      0.05        33
         100       0.14      0.03      0.05        33
         101       0.14      0.03      0.05        33
         102       0.14      0.03      0.05        33
         103       0.14      0.03      0.05        33
         104       0.14      0.03      0.05        33
         105       0.14      0.03      0.05        33
         106       0.14      0.03      0.05        33
         107       0.14      0.03      0.05        33
         108       0.14      0.03      0.05        33
         109       0.14      0.03      0.05        33
         110       0.14      0.03      0.05        33
         111       0.14      0.03      0.05        33
         112       0.14      0.03      0.05        33
         113       0.14      0.03      0.05        33
         114       0.14      0.03      0.05        33
         115       0.14      0.03      0.05        33
         116       0.14      0.03      0.05        33
         117       0.14      0.03      0.05        33
         118       0.14      0.03      0.05        33
         119       0.14      0.03      0.05        33
         120       0.14      0.03      0.05        33
         121       0.14      0.03      0.05        33
         122       0.14      0.03      0.05        33
         123       0.14      0.03      0.05        33
         124       0.14      0.03      0.05        33
         125       0.14      0.03      0.05        33
         126       0.14      0.03      0.05        33
         127       0.14      0.03      0.05        33
         128       0.14      0.03      0.05        33
         129       0.14      0.03      0.05        33
         130       0.14      0.03      0.05        33
         131       0.14      0.03      0.05        33
         132       0.14      0.03      0.05        33
         133       0.14      0.03      0.05        33
         134       0.14      0.03      0.05        33
         135       0.14      0.03      0.05        33
         136       0.14      0.03      0.05        33
         137       0.14      0.03      0.05        33
         138       0.14      0.03      0.05        33
         139       0.14      0.03      0.05        33
         140       0.14      0.03      0.05        33
         141       0.14      0.03      0.05        33
         142       0.14      0.03      0.05        33
         143       0.14      0.03      0.05        33
         144       0.14      0.03      0.05        33
         145       0.14      0.03      0.05        33
         146       0.14      0.03      0.05        33
         147       0.14      0.03      0.05        33
         148       0.14      0.03      0.05        33
         149       0.14      0.03      0.05        33
         150       0.14      0.03      0.05        33
         151       0.14      0.03      0.05        33
         152       0.14      0.03      0.05        33
         153       0.14      0.03      0.05        33

   micro avg       0.14      0.03      0.05      5082
   macro avg       0.14      0.03      0.05      5082
weighted avg       0.14      0.03      0.05      5082
 samples avg       0.00      0.00      0.00      5082

C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in samples with no true labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\thors\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\sklearn\metrics\_classification.py:1497: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in samples with no true nor predicted labels. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

df = preprocessed_df.copy()

# Some type combinations only occur once so we extract them and add them to the test data
df['Types'] = df[['type_1', 'type_2']].apply(lambda x: tuple(filter(lambda y: pd.notna(y), x)), axis=1)

singleton_classes = df['Types'].value_counts()[df['Types'].value_counts() == 1].index.tolist()
singleton_data = df[df['Types'].isin(singleton_classes)]
other_data = df[~df['Types'].isin(singleton_classes)]
df = df.drop(columns=['Types'])
other_data.drop(columns=['Types'], inplace=True)
singleton_data.drop(columns=['Types'], inplace=True)
y = df[['type_1', 'type_2']]
df.head()

y.head()

from sklearn.multioutput import MultiOutputClassifier

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(other_data.drop(columns=['type_1', 'type_2']), other_data[['type_1', 'type_2']], test_size=0.2, stratify=other_data[['type_1', 'type_2']], random_state=42)
X_train = pd.concat([X_train, singleton_data.drop(columns=['type_1', 'type_2'])])
y_train = pd.concat([y_train, singleton_data[['type_1', 'type_2']]])
X_test = pd.concat([X_test, singleton_data.drop(columns=['type_1', 'type_2'])])
y_test = pd.concat([y_test, singleton_data[['type_1', 'type_2']]])


base_classifier = DecisionTreeClassifier()

multi_output_classifier = MultiOutputClassifier(base_classifier)
multi_output_classifier.fit(X_train, y_train)
base_classifier.fit(X_train, y_train)

# Step 4: Model Evaluation
y_pred = multi_output_classifier.predict(X_test)

# Evaluate

print("Score: ", multi_output_classifier.score(X_test, y_test))

y_pred = base_classifier.predict(X_test)

# our own score function
a = (y_test == y_pred)
b = []

for i, j in enumerate(a.iterrows()):
    b.append(j[1]['type_1'] and j[1]['type_2'])

nb_correct = 0
for i in b:
    if i:
        nb_correct += 1

score_ratio = nb_correct/len(b)
print("score ratio: ",score_ratio)

# accuracy score for each type
accuracy_list=[]
y_test = np.asarray(y_test)
y_pred = np.asarray(y_pred)
for i in range(2):
    accuracy = accuracy_score(y_test[:, i], y_pred[:, i])
    accuracy_list.append(accuracy)
    print("Accuracy type ", i+1, ": ", accuracy )
print("Averaged Accuracy for types: ",np.mean(accuracy_list))

Score:  0.4049586776859504
score ratio:  0.45867768595041325
Accuracy type  1 :  0.6487603305785123
Accuracy type  2 :  0.6074380165289256
Averaged Accuracy for types:  0.6280991735537189

pipeline = make_pipeline(StandardScaler(), MultiOutputClassifier(DecisionTreeClassifier()))

param_dist = {
    "multioutputclassifier__estimator__max_depth": [5, 6, 7, 8, 9, 10, 15, 30, None], 
    "multioutputclassifier__estimator__min_samples_leaf": np.arange(1, 10)
}

# Instantiate the GridSearchCV object
grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=5)

# Fit grid_search_cv using the data X and labels y.
grid_search_cv.fit(X_train, y_train) 
y_pred = grid_search_cv.predict(X_test)

# Print the best score
print("Tuned Model Parameters: {}".format(grid_search_cv.best_params_))
print("Best score is {}".format(grid_search_cv.best_estimator_.score(X_test, y_test)))

Tuned Model Parameters: {'multioutputclassifier__estimator__max_depth': 30, 'multioutputclassifier__estimator__min_samples_leaf': 1}
Best score is 0.4049586776859504

from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
base_classifier = RandomForestClassifier()

#base_classifier.fit(X_train, y_train)

multi_output_classifier = MultiOutputClassifier(base_classifier)
multi_output_classifier.fit(X_train, y_train)

# Step 4: Model Evaluation
accuracy_list=[]
y_pred = multi_output_classifier.predict(X_test)
print("score: ", multi_output_classifier.score(X_test, y_test))
y_test = np.asarray(y_test)
y_pred = np.asarray(y_pred)
for i in range(2):
    accuracy = accuracy_score(y_test[:, i], y_pred[:, i])
    print("Accuracy type ", i+1, ": ", accuracy )
    accuracy_list.append(accuracy)
print("Averaged Accuracy for types: ",np.mean(accuracy_list))

score:  0.5619834710743802
Accuracy type  1 :  0.7933884297520661
Accuracy type  2 :  0.6818181818181818
Averaged Accuracy for types:  0.7376033057851239

pipeline = make_pipeline(StandardScaler(), MultiOutputClassifier(RandomForestClassifier()))

# Setup the parameters and distributions to sample from: param_dist
param_dist = {
    "multioutputclassifier__estimator__max_depth": [5, 10, 15, 30, None],  
    "multioutputclassifier__estimator__min_samples_leaf": np.arange(1, 10, 2),
    "multioutputclassifier__estimator__n_estimators": np.arange(60, 140, 8)
}

# Instantiate the RandomizedSearchCV object: random_grid_search_cv
random_search_cv = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=50, cv=3, random_state=42)
#grid_search_cv = GridSearchCV(pipeline, param_grid=param_dist, cv=3)

# Fit random_search_cv using the data X and labels y
random_search_cv.fit(X_train, y_train)
#grid_search_cv.fit(X_train, y_train)

# Print the best score
print("Best score is {}".format(random_search_cv.best_estimator_.score(X_test, y_test)))
print("Best parameters are {}".format(random_search_cv.best_params_))

Best score is 0.5578512396694215
Best parameters are {'multioutputclassifier__estimator__max_depth': None, 'multioutputclassifier__estimator__min_samples_leaf': 1, 'multioutputclassifier__estimator__n_estimators': 116}

from sklearn.multioutput import MultiOutputClassifier
from sklearn.neighbors import KNeighborsClassifier
base_classifier = KNeighborsClassifier()


multi_output_classifier = MultiOutputClassifier(base_classifier)
multi_output_classifier.fit(X_train, y_train)

# Step 4: Model Evaluation
y_pred = multi_output_classifier.predict(X_test)
print("score: ", multi_output_classifier.score(X_test, y_test))
y_test = np.asarray(y_test)
y_pred = np.asarray(y_pred)

accuracy_list=[]
for i in range(2):
    accuracy = accuracy_score(y_test[:, i], y_pred[:, i])
    print("Accuracy type ", i+1, ": ", accuracy )
    accuracy_list.append(accuracy)
print("Averaged Accuracy for types: ",np.mean(accuracy_list))

score:  0.08264462809917356
Accuracy type  1 :  0.24793388429752067
Accuracy type  2 :  0.35537190082644626
Averaged Accuracy for types:  0.30165289256198347

param_grid = {
    'multioutputclassifier__estimator__n_neighbors': [3, 5, 7, 9]  # List of k values to try
}

pipeline = make_pipeline(StandardScaler(), MultiOutputClassifier(KNeighborsClassifier()))

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score=grid_search.best_estimator_.score(X_test, y_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Best Parameters: {'multioutputclassifier__estimator__n_neighbors': 3}
Best Score: 0.2892561983471074

	Unnamed: 0	pokedex_number	generation	type_number	height_m	weight_kg	abilities_number	total_points	hp	attack	...	against_ground	against_flying	against_psychic	against_bug	against_rock	against_ghost	against_dragon	against_dark	against_steel	against_fairy
count	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1044.000000	1045.000000	1045.000000	1045.000000	1045.000000	...	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000	1045.000000
mean	522.000000	440.769378	4.098565	1.529187	1.374067	71.216571	2.268900	439.353110	70.067943	80.476555	...	1.082297	1.168900	0.977273	0.998086	1.238278	1.018660	0.977033	1.071053	0.981579	1.091148
std	301.809819	262.517231	2.272788	0.499386	3.353349	132.259911	0.803154	121.992897	26.671411	32.432728	...	0.782683	0.592145	0.501934	0.610411	0.696560	0.568056	0.375812	0.465178	0.501753	0.536285
min	0.000000	1.000000	1.000000	1.000000	0.100000	0.100000	0.000000	175.000000	1.000000	5.000000	...	0.000000	0.250000	0.000000	0.000000	0.250000	0.000000	0.000000	0.250000	0.000000	0.000000
25%	261.000000	212.000000	2.000000	1.000000	0.600000	9.000000	2.000000	330.000000	50.000000	55.000000	...	0.500000	1.000000	1.000000	0.500000	1.000000	1.000000	1.000000	1.000000	0.500000	1.000000
50%	522.000000	436.000000	4.000000	2.000000	1.000000	29.500000	2.000000	458.000000	68.000000	77.000000	...	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
75%	783.000000	670.000000	6.000000	2.000000	1.600000	70.500000	3.000000	515.000000	82.000000	100.000000	...	1.500000	1.000000	1.000000	1.000000	2.000000	1.000000	1.000000	1.000000	1.000000	1.000000
max	1044.000000	898.000000	8.000000	2.000000	100.000000	999.900000	3.000000	1125.000000	255.000000	190.000000	...	4.000000	4.000000	4.000000	4.000000	4.000000	4.000000	2.000000	4.000000	4.000000	4.000000

	generation	height_m	weight_kg	abilities_number	total_points	hp	attack	defense	sp_attack	sp_defense	...	egg_type_2_Field	egg_type_2_Flying	egg_type_2_Grass	egg_type_2_Human-Like	egg_type_2_Mineral	egg_type_2_Monster	egg_type_2_None	egg_type_2_Water 1	egg_type_2_Water 2	egg_type_2_Water 3
0	1	0.7	6.9	2	318	45	49	49	65	65	...	False	False	False	False	False	True	False	False	False	False
1	1	1.0	13.0	2	405	60	62	63	80	80	...	False	False	False	False	False	True	False	False	False	False
2	1	2.0	100.0	2	525	80	82	83	100	100	...	False	False	False	False	False	True	False	False	False	False
3	1	2.4	155.5	1	625	80	100	123	122	120	...	False	False	False	False	False	True	False	False	False	False
4	1	0.6	8.5	2	309	39	52	43	60	50	...	False	False	False	False	False	True	False	False	False	False

	generation	height_m	weight_kg	abilities_number	total_points	hp	attack	defense	sp_attack	sp_defense	...	egg_type_2_Flying	egg_type_2_Grass	egg_type_2_Human-Like	egg_type_2_Mineral	egg_type_2_Monster	egg_type_2_None	egg_type_2_Water 1	egg_type_2_Water 2	egg_type_2_Water 3	Types
0	1	0.7	6.9	2	318	45	49	49	65	65	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
1	1	1.0	13.0	2	405	60	62	63	80	80	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
2	1	2.0	100.0	2	525	80	82	83	100	100	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
3	1	2.4	155.5	1	625	80	100	123	122	120	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
4	1	0.6	8.5	2	309	39	52	43	60	50	...	False	False	False	False	True	False	False	False	False	('Fire', 'None')

	generation	height_m	weight_kg	abilities_number	total_points	hp	attack	defense	sp_attack	sp_defense	...	egg_type_2_Flying	egg_type_2_Grass	egg_type_2_Human-Like	egg_type_2_Mineral	egg_type_2_Monster	egg_type_2_None	egg_type_2_Water 1	egg_type_2_Water 2	egg_type_2_Water 3	Types
0	1	0.7	6.9	2	318	45	49	49	65	65	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
1	1	1.0	13.0	2	405	60	62	63	80	80	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
2	1	2.0	100.0	2	525	80	82	83	100	100	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
3	1	2.4	155.5	1	625	80	100	123	122	120	...	False	False	False	False	True	False	False	False	False	('Grass', 'Poison')
4	1	0.6	8.5	2	309	39	52	43	60	50	...	False	False	False	False	True	False	False	False	False	('Fire', 'None')

	generation	height_m	weight_kg	abilities_number	total_points	hp	attack	defense	sp_attack	sp_defense	...	egg_type_2_Flying	egg_type_2_Grass	egg_type_2_Human-Like	egg_type_2_Mineral	egg_type_2_Monster	egg_type_2_None	egg_type_2_Water 1	egg_type_2_Water 2	egg_type_2_Water 3	Types
0	1	0.7	6.9	2	318	45	49	49	65	65	...	False	False	False	False	True	False	False	False	False	['Grass', 'Poison']
1	1	1.0	13.0	2	405	60	62	63	80	80	...	False	False	False	False	True	False	False	False	False	['Grass', 'Poison']
2	1	2.0	100.0	2	525	80	82	83	100	100	...	False	False	False	False	True	False	False	False	False	['Grass', 'Poison']
3	1	2.4	155.5	1	625	80	100	123	122	120	...	False	False	False	False	True	False	False	False	False	['Grass', 'Poison']
4	1	0.6	8.5	2	309	39	52	43	60	50	...	False	False	False	False	True	False	False	False	False	['Fire', 'None']

	Unnamed: 0	pokedex_number	name	german_name	japanese_name	generation	status	species	type_number	type_1	...	against_ground	against_flying	against_psychic	against_bug	against_rock	against_ghost	against_dragon	against_dark	against_steel	against_fairy
0	0	1	Bulbasaur	Bisasam	フシギダネ (Fushigidane)	1	Normal	Seed Pokémon	2	Grass	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
1	1	2	Ivysaur	Bisaknosp	フシギソウ (Fushigisou)	1	Normal	Seed Pokémon	2	Grass	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
2	2	3	Venusaur	Bisaflor	フシギバナ (Fushigibana)	1	Normal	Seed Pokémon	2	Grass	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
3	3	3	Mega Venusaur	Bisaflor	フシギバナ (Fushigibana)	1	Normal	Seed Pokémon	2	Grass	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
4	4	4	Charmander	Glumanda	ヒトカゲ (Hitokage)	1	Normal	Lizard Pokémon	1	Fire	...	2.0	1.0	1.0	0.5	2.0	1.0	1.0	1.0	0.5	0.5

	generation	status	type_number	type_1	type_2	height_m	weight_kg	abilities_number	ability_1	ability_2	...	against_ground	against_flying	against_psychic	against_bug	against_rock	against_ghost	against_dragon	against_dark	against_steel	against_fairy
0	1	Normal	2	Grass	Poison	0.7	6.9	2	Overgrow	NaN	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
1	1	Normal	2	Grass	Poison	1.0	13.0	2	Overgrow	NaN	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
2	1	Normal	2	Grass	Poison	2.0	100.0	2	Overgrow	NaN	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
3	1	Normal	2	Grass	Poison	2.4	155.5	1	Thick Fat	NaN	...	1.0	2.0	2.0	1.0	1.0	1.0	1.0	1.0	1.0	0.5
4	1	Normal	1	Fire	NaN	0.6	8.5	2	Blaze	NaN	...	2.0	1.0	1.0	0.5	2.0	1.0	1.0	1.0	0.5	0.5

Exploring the dataset¶

Common preprocessing steps¶

Multi-class classification¶

Accounting for Order of Types¶

Decision tree¶

Hyperparameter Tuning¶

Random forest¶

Hyperparameter Tuning¶

Support Vector Machine¶

Radial base function¶

Hyperparameter Tuning¶

Linear¶

Hyperparameter tuning¶

Polynomial¶

Hyperparameter Tuning¶

Sigmoid¶

Hyperparameter Tuning¶

k Nearest neighbors¶

Hyperparameter Tuning¶

Logistic regression¶

Ignoring Order of Types¶

Preprocessing¶

Decision tree¶

Hyperparameter tuning¶

Random forest¶

Hyperparameter Tuning¶

Support vector machine¶

Radial base function¶

Hyperparameter Tuning¶

Linear¶

Hyperparameter tuning¶

Polynomial¶

Hyperparameter Tuning¶

Sigmoid¶

Hyperparameter Tuning¶

k Nearest Neighbors¶

Hyperparameter Tuning¶

Logistic regression¶

Multi-label Classification¶

Accounting for Order of Types¶

Preprocessing¶

Decision Tree¶

Hyperparameter tuning¶

Random Forest¶

Hyperparameter Tuning¶

K Nearest neighbors¶

Hyperparameter Tuning¶

Ignoring Order of Types 1¶

Preprocessing¶

Decision tree¶

Hyperparameter tuning¶

Random Forest¶

Hyperparameter tuning¶

K Nearest neigbors¶

Hyperparameter tuning¶

Ignoring Order of Types 2¶

Preprocessing¶

Decision Tree¶

Hyperparameter Tuning¶

Random forests¶

Hyperparameter Tuning¶

K Nearest Neighbors¶

Hyperparameter Tuning¶

Multiclass multioutput Classification¶

Preprocessing¶

Decision Tree¶

Hyperparameter Tuning¶

Random Forest¶

Hyperparameter Tuning¶

KNeighborsClassifier¶

Hyperparameter Tuning¶