import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("responses.csv")
data.head()

df = data.iloc[:,0:19]
df["Age"] = data["Age"]
df["Height"] = data["Height"]
df["Weight"] = data["Weight"]
df["Siblings"] = data["Number of siblings"]
df["Gender"] = data["Gender"]
df["Education"] = data["Education"]
df["Location"] = data["Village - town"]

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1010 entries, 0 to 1009
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Music                     1007 non-null   float64
 1   Slow songs or fast songs  1008 non-null   float64
 2   Dance                     1006 non-null   float64
 3   Folk                      1005 non-null   float64
 4   Country                   1005 non-null   float64
 5   Classical music           1003 non-null   float64
 6   Musical                   1008 non-null   float64
 7   Pop                       1007 non-null   float64
 8   Rock                      1004 non-null   float64
 9   Metal or Hardrock         1007 non-null   float64
 10  Punk                      1002 non-null   float64
 11  Hiphop, Rap               1006 non-null   float64
 12  Reggae, Ska               1003 non-null   float64
 13  Swing, Jazz               1004 non-null   float64
 14  Rock n roll               1003 non-null   float64
 15  Alternative               1003 non-null   float64
 16  Latino                    1002 non-null   float64
 17  Techno, Trance            1003 non-null   float64
 18  Opera                     1009 non-null   float64
 19  Age                       1003 non-null   float64
 20  Height                    990 non-null    float64
 21  Weight                    990 non-null    float64
 22  Siblings                  1004 non-null   float64
 23  Gender                    1004 non-null   object 
 24  Education                 1009 non-null   object 
 25  Location                  1006 non-null   object 
dtypes: float64(23), object(3)
memory usage: 205.3+ KB
None

df.dropna(inplace = True)
df.reset_index(drop=True,inplace=True)

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 898 entries, 0 to 897
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Music                     898 non-null    float64
 1   Slow songs or fast songs  898 non-null    float64
 2   Dance                     898 non-null    float64
 3   Folk                      898 non-null    float64
 4   Country                   898 non-null    float64
 5   Classical music           898 non-null    float64
 6   Musical                   898 non-null    float64
 7   Pop                       898 non-null    float64
 8   Rock                      898 non-null    float64
 9   Metal or Hardrock         898 non-null    float64
 10  Punk                      898 non-null    float64
 11  Hiphop, Rap               898 non-null    float64
 12  Reggae, Ska               898 non-null    float64
 13  Swing, Jazz               898 non-null    float64
 14  Rock n roll               898 non-null    float64
 15  Alternative               898 non-null    float64
 16  Latino                    898 non-null    float64
 17  Techno, Trance            898 non-null    float64
 18  Opera                     898 non-null    float64
 19  Age                       898 non-null    float64
 20  Height                    898 non-null    float64
 21  Weight                    898 non-null    float64
 22  Siblings                  898 non-null    float64
 23  Gender                    898 non-null    object 
 24  Education                 898 non-null    object 
 25  Location                  898 non-null    object 
dtypes: float64(23), object(3)
memory usage: 182.5+ KB
None

for each in range(0,26) :
    if type(df.iloc[1,each]) == np.float64 :
        df[df.columns[each]] = df[df.columns[each]].astype(int)
    else :
        df[df.columns[each]] = df[df.columns[each]]

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 898 entries, 0 to 897
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Music                     898 non-null    int64 
 1   Slow songs or fast songs  898 non-null    int64 
 2   Dance                     898 non-null    int64 
 3   Folk                      898 non-null    int64 
 4   Country                   898 non-null    int64 
 5   Classical music           898 non-null    int64 
 6   Musical                   898 non-null    int64 
 7   Pop                       898 non-null    int64 
 8   Rock                      898 non-null    int64 
 9   Metal or Hardrock         898 non-null    int64 
 10  Punk                      898 non-null    int64 
 11  Hiphop, Rap               898 non-null    int64 
 12  Reggae, Ska               898 non-null    int64 
 13  Swing, Jazz               898 non-null    int64 
 14  Rock n roll               898 non-null    int64 
 15  Alternative               898 non-null    int64 
 16  Latino                    898 non-null    int64 
 17  Techno, Trance            898 non-null    int64 
 18  Opera                     898 non-null    int64 
 19  Age                       898 non-null    int64 
 20  Height                    898 non-null    int64 
 21  Weight                    898 non-null    int64 
 22  Siblings                  898 non-null    int64 
 23  Gender                    898 non-null    object
 24  Education                 898 non-null    object
 25  Location                  898 non-null    object
dtypes: int64(23), object(3)
memory usage: 182.5+ KB
None

print(df['Music'].value_counts(dropna =False))

Music
5    735
4    114
3     30
2     10
1      9
Name: count, dtype: int64

filtre = df.Music < 4
filt_list = list(df[filtre].index)
i=0
for each in filt_list:
    df.drop(df.index[each-i], inplace=True)
    i=i+1
df.reset_index(drop=True,inplace=True)
row = len(df.index)

print(df['Music'].value_counts(dropna =False))

Music
5    735
4    114
Name: count, dtype: int64

df.drop(['Music'], axis=1,inplace = True)

df.head()

dummies1 = pd.get_dummies(df.Gender, dtype=int)
# dummies2 = pd.get_dummies(df.Education, dtype=int)
dummies3 = pd.get_dummies(df.Location, dtype=int)
df = pd.concat([df, dummies1, dummies3], axis='columns')
# let education be represented as 0,1,2,...
for education in range(0,row) :
    if df.loc[education,'Education'] == 'currently a primary school pupil' :
        df.loc[education,'Education'] = 0
    elif df.loc[education,'Education'] == 'primary school':
        df.loc[education,'Education'] = 1
    elif df.loc[education,'Education'] == 'secondary school':
        df.loc[education,'Education'] = 2
    elif df.loc[education,'Education'] == 'college/bachelor degree':
        df.loc[education,'Education'] = 3
    elif df.loc[education,'Education'] == 'masters degree':
        df.loc[education,'Education'] = 4
    elif df.loc[education,'Education'] == 'doctorate degree':
        df.loc[education,'Education'] = 5
    # in case i missed any
    else :
        df.loc[education,'Education'] = 6
df['Education'] = df['Education'].astype(int)
df.drop(['Gender','Location'], axis='columns',inplace = True)
# let gender be represented in one column as 0 or 1
df['Gender'] = df['female']
# let location be represented in one column as 0 or 1
df['Location'] = df['city']
# avoid dummy variable trap
df.drop(['female', 'male', 'city', 'village'], axis='columns',inplace = True)
# df.drop(['currently a primary school pupil', 'masters degree', 'doctorate degree'], axis='columns',inplace = True)
df.head()

df.dtypes

Slow songs or fast songs    int64
Dance                       int64
Folk                        int64
Country                     int64
Classical music             int64
Musical                     int64
Pop                         int64
Rock                        int64
Metal or Hardrock           int64
Punk                        int64
Hiphop, Rap                 int64
Reggae, Ska                 int64
Swing, Jazz                 int64
Rock n roll                 int64
Alternative                 int64
Latino                      int64
Techno, Trance              int64
Opera                       int64
Age                         int64
Height                      int64
Weight                      int64
Siblings                    int64
Education                   int64
Gender                      int64
Location                    int64
dtype: object

f,ax = plt.subplots(figsize=(25, 20))
sns.heatmap(df.corr(), annot=True, linewidths=1, fmt= '.1f',ax=ax)
plt.show()

for target in df.columns[:18]:
    df[target] = df.apply(lambda row : row[target]/5, axis = 1)
df.head()

TargetVariable=df.columns[:18]
Predictors=df.columns[18:]
 
X=df[Predictors].values
y=df[TargetVariable].values
 
### Sandardization of data ###
from sklearn.preprocessing import StandardScaler
PredictorScaler=StandardScaler()
TargetVarScaler=StandardScaler()
 
# Storing the fit object for later reference
PredictorScalerFit=PredictorScaler.fit(X)
TargetVarScalerFit=TargetVarScaler.fit(y)
 
# Generating the standardized values of X and y
X=PredictorScalerFit.transform(X)
y=TargetVarScalerFit.transform(y)
 
# Split the data into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69420)
 
# Quick sanity check with the shapes of Training and testing datasets
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(679, 7)
(679, 18)
(170, 7)
(170, 18)

run = False
if run:
    # Function to generate Deep ANN model 
    def make_regression_ann(Optimizer_trial):
        from keras.models import Sequential
        from keras.layers import Dense
        
        model = Sequential()
        model.add(Dense(units=64, input_dim=7, kernel_initializer='normal', activation='relu'))
        model.add(Dense(units=64, kernel_initializer='normal', activation='tanh'))
        model.add(Dense(units=64, kernel_initializer='normal', activation='relu'))
        model.add(Dense(18, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer=Optimizer_trial)
        return model

    ###########################################
    from sklearn.model_selection import GridSearchCV
    from keras.wrappers.scikit_learn import KerasRegressor

    # Listing all the parameters to try
    Parameter_Trials={'batch_size':[16,32,64,128,256,512],
                        'epochs':[5,10,20,50,100],
                        'Optimizer_trial':['adam', 'rmsprop']
                    }

    # Creating the regression ANN model
    RegModel=KerasRegressor(make_regression_ann, verbose=0)

    ###########################################
    from sklearn.metrics import make_scorer

    # Defining a custom function to calculate accuracy
    def Accuracy_Score(orig,pred):
        MAPE = np.mean(100 * (np.abs(orig-pred)/orig))
        print('#'*70,'Accuracy:', 100-MAPE)
        return(100-MAPE)

    custom_Scoring=make_scorer(Accuracy_Score, greater_is_better=True)

    #########################################
    # Creating the Grid search space
    # See different scoring methods by using sklearn.metrics.SCORERS.keys()
    grid_search=GridSearchCV(estimator=RegModel, 
                            param_grid=Parameter_Trials, 
                            scoring=custom_Scoring, 
                            cv=5)

    #########################################
    # Measuring how much time it took to find the best params
    import time
    StartTime=time.time()

    # Running Grid Search for different paramenters
    grid_search.fit(X,y, verbose=1)

    EndTime=time.time()
    print("########## Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

    print('### Printing Best parameters ###')
    grid_search.best_params_

from keras.models import Sequential
from keras.layers import Dense, Dropout
 
# create ANN model
model = Sequential()

# Dropout layer
model.add(Dropout(0.2, input_shape=(7,)))

# Defining the Input layer and FIRST hidden layer, both are same!
model.add(Dense(units=64, input_dim=7, kernel_initializer='normal', activation='relu'))
 
# Defining the Second layer of the model
# after the first layer we don't have to specify input_dim as keras configure it automatically
model.add(Dense(units=64, kernel_initializer='normal', activation='tanh'))

# Third layer
model.add(Dense(units=64, kernel_initializer='normal', activation='relu'))
 
# The output neuron is a single fully connected node 
model.add(Dense(18, kernel_initializer='normal'))
 
# Compiling the model
model.compile(loss='mean_squared_error', optimizer='adam')
 
# Fitting the ANN to the Training set
model.fit(X_train, y_train ,batch_size = 512, epochs = 5, verbose=1)

# Generating Predictions on testing data
Predictions=model.predict(X_test)
 
# Scaling the predicted Price data back to original price scale
Predictions=TargetVarScalerFit.inverse_transform(Predictions)
 
# Scaling the y_test Price data back to original price scale
y_test_orig=TargetVarScalerFit.inverse_transform(y_test)
 
# Scaling the test data back to original scale
Test_Data=PredictorScalerFit.inverse_transform(X_test)
 
TestingData=pd.DataFrame(data=Test_Data, columns=Predictors)
TestingData[df.columns[:18]]=y_test_orig
PredNames = ['Predicted ' + target for target in df.columns[:18]]
TestingData[PredNames]=Predictions
TestingData.head()

Epoch 1/5
2/2 [==============================] - 1s 5ms/step - loss: 0.9964
Epoch 2/5
2/2 [==============================] - 0s 5ms/step - loss: 0.9958
Epoch 3/5
2/2 [==============================] - 0s 6ms/step - loss: 0.9952
Epoch 4/5
2/2 [==============================] - 0s 5ms/step - loss: 0.9944
Epoch 5/5
2/2 [==============================] - 0s 4ms/step - loss: 0.9937
6/6 [==============================] - 0s 1ms/step

APEs = []
for col in df.columns[:18]:
    APE = 100*(abs(TestingData[col]-TestingData['Predicted ' + col])/TestingData[col])
    APEs.append(APE)
    TestingData[col + ' APE'] = APE
 
print('The Accuracy of ANN model is:', 100-np.mean(APEs))
TestingData.head()

The Accuracy of ANN model is: 44.868009843516596

model.save("genrepredmodel")

2023-06-06 05:04:07.807147: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'dropout_11_input' with dtype float and shape [?,7]
	 [[{{node dropout_11_input}}]]
2023-06-06 05:04:07.871773: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2023-06-06 05:04:07.892504: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'dropout_11_input' with dtype float and shape [?,7]
	 [[{{node dropout_11_input}}]]
2023-06-06 05:04:07.930261: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2023-06-06 05:04:07.964044: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2023-06-06 05:04:08.031101: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2023-06-06 05:04:08.089239: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
WARNING:absl:Found untraced functions such as _update_step_xla while saving (showing 1 of 1). These functions will not be directly callable after loading.

INFO:tensorflow:Assets written to: genrepredmodel/assets

INFO:tensorflow:Assets written to: genrepredmodel/assets

	Music	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	...	Age	Height	Weight	Number of siblings	Gender	Left - right handed	Education	Only child	Village - town	House - block of flats
0	5.0	3.0	2.0	1.0	2.0	2.0	1.0	5.0	5.0	1.0	...	20.0	163.0	48.0	1.0	female	right handed	college/bachelor degree	no	village	block of flats
1	4.0	4.0	2.0	1.0	1.0	1.0	2.0	3.0	5.0	4.0	...	19.0	163.0	58.0	2.0	female	right handed	college/bachelor degree	no	city	block of flats
2	5.0	5.0	2.0	2.0	3.0	4.0	5.0	3.0	5.0	3.0	...	20.0	176.0	67.0	2.0	female	right handed	secondary school	no	city	block of flats
3	5.0	3.0	2.0	1.0	1.0	1.0	1.0	2.0	2.0	1.0	...	22.0	172.0	59.0	1.0	female	right handed	college/bachelor degree	yes	city	house/bungalow
4	5.0	3.0	4.0	3.0	2.0	4.0	3.0	5.0	3.0	1.0	...	20.0	170.0	59.0	1.0	female	right handed	secondary school	no	village	house/bungalow

	Age	Height	Weight	Siblings	Education	Gender	Location	Slow songs or fast songs	Dance	Folk	...	Predicted Metal or Hardrock	Predicted Punk	Predicted Hiphop, Rap	Predicted Reggae, Ska	Predicted Swing, Jazz	Predicted Rock n roll	Predicted Alternative	Predicted Latino	Predicted Techno, Trance	Predicted Opera
0	21.0	170.0	52.0	1.0	2.0	1.0	1.0	0.6	0.8	0.2	...	0.472132	0.491922	0.585427	0.556206	0.560958	0.638533	0.573585	0.572241	0.469346	0.433011
1	22.0	182.0	71.0	1.0	2.0	0.0	0.0	0.8	0.8	0.4	...	0.478891	0.497449	0.587976	0.558163	0.557445	0.640600	0.574664	0.563489	0.477443	0.432210
2	19.0	180.0	77.0	1.0	2.0	0.0	1.0	0.6	0.6	0.6	...	0.478754	0.491955	0.590176	0.553323	0.558342	0.635931	0.576548	0.563108	0.476471	0.431431
3	18.0	170.0	50.0	1.0	2.0	1.0	1.0	0.6	0.8	0.4	...	0.472186	0.492355	0.586317	0.555870	0.560950	0.638502	0.574939	0.571488	0.469721	0.432531
4	21.0	173.0	62.0	1.0	2.0	1.0	1.0	0.4	0.6	0.2	...	0.472478	0.491221	0.586158	0.555400	0.560664	0.637893	0.574248	0.571415	0.470320	0.432865

	Age	Height	Weight	Siblings	Education	Gender	Location	Slow songs or fast songs	Dance	Folk	...	Metal or Hardrock APE	Punk APE	Hiphop, Rap APE	Reggae, Ska APE	Swing, Jazz APE	Rock n roll APE	Alternative APE	Latino APE	Techno, Trance APE	Opera APE
0	21.0	170.0	52.0	1.0	2.0	1.0	1.0	0.6	0.8	0.2	...	136.065999	145.961010	26.821649	178.102815	180.479074	59.633234	186.792725	42.775947	134.673157	8.252682
1	22.0	182.0	71.0	1.0	2.0	0.0	0.0	0.8	0.8	0.4	...	40.138593	24.362245	2.003944	179.081643	30.319361	19.924967	4.222677	6.085243	20.426160	116.105118
2	19.0	180.0	77.0	1.0	2.0	0.0	1.0	0.6	0.6	0.6	...	139.377096	22.988768	47.543982	7.779475	44.165754	5.988433	42.345172	6.148682	19.117625	46.071181
3	18.0	170.0	50.0	1.0	2.0	1.0	1.0	0.6	0.8	0.4	...	136.092776	23.088799	46.579176	177.934939	180.474901	219.250852	187.469685	4.752046	134.860420	116.265455
4	21.0	173.0	62.0	1.0	2.0	1.0	1.0	0.4	0.6	0.2	...	18.119456	145.610565	26.730193	38.849914	40.166074	20.263343	187.124127	42.853871	135.159978	116.432393

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	3	2	1	2	2	1	5	5	1	1	...	1	1	1	20	163	48	1	3	1	0
1	4	2	1	1	1	2	3	5	4	4	...	2	1	1	19	163	58	2	3	1	1
2	5	2	2	3	4	5	3	5	3	4	...	5	1	3	20	176	67	2	2	1	1
3	3	2	1	1	1	1	2	2	1	4	...	1	2	1	22	172	59	1	3	1	1
4	3	4	3	2	4	3	5	3	1	2	...	4	2	2	20	170	59	1	2	1	0

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	0.6	0.4	0.2	0.4	0.4	0.2	1.0	1.0	0.2	0.2	...	0.2	0.2	0.2	20	163	48	1	3	1	0
1	0.8	0.4	0.2	0.2	0.2	0.4	0.6	1.0	0.8	0.8	...	0.4	0.2	0.2	19	163	58	2	3	1	1
2	1.0	0.4	0.4	0.6	0.8	1.0	0.6	1.0	0.6	0.8	...	1.0	0.2	0.6	20	176	67	2	2	1	1
3	0.6	0.4	0.2	0.2	0.2	0.2	0.4	0.4	0.2	0.8	...	0.2	0.4	0.2	22	172	59	1	3	1	1
4	0.6	0.8	0.6	0.4	0.8	0.6	1.0	0.6	0.2	0.4	...	0.8	0.4	0.4	20	170	59	1	2	1	0

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	3	2	1	2	2	1	5	5	1	1	...	1	1	1	20	163	48	1	3	1	0
1	4	2	1	1	1	2	3	5	4	4	...	2	1	1	19	163	58	2	3	1	1
2	5	2	2	3	4	5	3	5	3	4	...	5	1	3	20	176	67	2	2	1	1
3	3	2	1	1	1	1	2	2	1	4	...	1	2	1	22	172	59	1	3	1	1
4	3	4	3	2	4	3	5	3	1	2	...	4	2	2	20	170	59	1	2	1	0

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	0.6	0.4	0.2	0.4	0.4	0.2	1.0	1.0	0.2	0.2	...	0.2	0.2	0.2	20	163	48	1	3	1	0
1	0.8	0.4	0.2	0.2	0.2	0.4	0.6	1.0	0.8	0.8	...	0.4	0.2	0.2	19	163	58	2	3	1	1
2	1.0	0.4	0.4	0.6	0.8	1.0	0.6	1.0	0.6	0.8	...	1.0	0.2	0.6	20	176	67	2	2	1	1
3	0.6	0.4	0.2	0.2	0.2	0.2	0.4	0.4	0.2	0.8	...	0.2	0.4	0.2	22	172	59	1	3	1	1
4	0.6	0.8	0.6	0.4	0.8	0.6	1.0	0.6	0.2	0.4	...	0.8	0.4	0.4	20	170	59	1	2	1	0

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	3	2	1	2	2	1	5	5	1	1	...	1	1	1	20	163	48	1	3	1	0
1	4	2	1	1	1	2	3	5	4	4	...	2	1	1	19	163	58	2	3	1	1
2	5	2	2	3	4	5	3	5	3	4	...	5	1	3	20	176	67	2	2	1	1
3	3	2	1	1	1	1	2	2	1	4	...	1	2	1	22	172	59	1	3	1	1
4	3	4	3	2	4	3	5	3	1	2	...	4	2	2	20	170	59	1	2	1	0

	Slow songs or fast songs	Dance	Folk	Country	Classical music	Musical	Pop	Rock	Metal or Hardrock	Punk	...	Latino	Techno, Trance	Opera	Age	Height	Weight	Siblings	Education	Gender	Location
0	0.6	0.4	0.2	0.4	0.4	0.2	1.0	1.0	0.2	0.2	...	0.2	0.2	0.2	20	163	48	1	3	1	0
1	0.8	0.4	0.2	0.2	0.2	0.4	0.6	1.0	0.8	0.8	...	0.4	0.2	0.2	19	163	58	2	3	1	1
2	1.0	0.4	0.4	0.6	0.8	1.0	0.6	1.0	0.6	0.8	...	1.0	0.2	0.6	20	176	67	2	2	1	1
3	0.6	0.4	0.2	0.2	0.2	0.2	0.4	0.4	0.2	0.8	...	0.2	0.4	0.2	22	172	59	1	3	1	1
4	0.6	0.8	0.6	0.4	0.8	0.6	1.0	0.6	0.2	0.4	...	0.8	0.4	0.4	20	170	59	1	2	1	0