In [48]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, normalize
df = pd.read_csv('student_sleep_patterns.csv')
df
Out[48]:
Student_ID Age Gender University_Year Sleep_Duration Study_Hours Screen_Time Caffeine_Intake Physical_Activity Sleep_Quality Weekday_Sleep_Start Weekend_Sleep_Start Weekday_Sleep_End Weekend_Sleep_End
0 1 24 Other 2nd Year 7.7 7.9 3.4 2 37 10 14.16 4.05 7.41 7.06
1 2 21 Male 1st Year 6.3 6.0 1.9 5 74 2 8.73 7.10 8.21 10.21
2 3 22 Male 4th Year 5.1 6.7 3.9 5 53 5 20.00 20.47 6.88 10.92
3 4 24 Other 4th Year 6.3 8.6 2.8 4 55 9 19.82 4.08 6.69 9.42
4 5 20 Male 4th Year 4.7 2.7 2.7 0 85 3 20.98 6.12 8.98 9.01
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
495 496 24 Male 2nd Year 5.1 9.3 1.9 4 110 4 17.42 8.43 6.93 10.78
496 497 20 Male 2nd Year 8.9 7.7 3.5 3 40 4 1.22 15.54 5.85 7.23
497 498 21 Male 3rd Year 5.7 6.4 3.9 1 68 10 9.94 2.25 5.46 10.72
498 499 18 Female 2nd Year 4.9 0.5 3.5 0 12 2 19.10 15.49 8.35 7.20
499 500 21 Male 3rd Year 7.9 11.6 1.0 0 86 1 7.54 14.12 7.01 9.19

500 rows × 14 columns

In [49]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Student_ID           500 non-null    int64  
 1   Age                  500 non-null    int64  
 2   Gender               500 non-null    object 
 3   University_Year      500 non-null    object 
 4   Sleep_Duration       500 non-null    float64
 5   Study_Hours          500 non-null    float64
 6   Screen_Time          500 non-null    float64
 7   Caffeine_Intake      500 non-null    int64  
 8   Physical_Activity    500 non-null    int64  
 9   Sleep_Quality        500 non-null    int64  
 10  Weekday_Sleep_Start  500 non-null    float64
 11  Weekend_Sleep_Start  500 non-null    float64
 12  Weekday_Sleep_End    500 non-null    float64
 13  Weekend_Sleep_End    500 non-null    float64
dtypes: float64(7), int64(5), object(2)
memory usage: 54.8+ KB
In [50]:
df.shape
Out[50]:
(500, 14)
In [68]:
columns = ['University_Year', 'Screen_Time', 'Caffeine_Intake',]

for col in columns:
    x = df.groupby([col])['Sleep_Quality'].mean().reset_index()
    sns.lineplot(x=col, y='Sleep_Quality', data=x)
    plt.title(f'Sleep Quality by {col}')
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [52]:
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA

scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)
normalized_X = normalize(scaled_X) 
normalized_X = pd.DataFrame(normalized_X) 

pca = PCA(n_components=2) 
X_pca = pca.fit_transform(normalized_X) 
X_pca = pd.DataFrame(X_pca) 
X_pca.columns = ['Caffeine_Intake', 'Study_Hours'] 

plt.figure(figsize=(10, 6), dpi=200)
plt.scatter(X_pca['Caffeine_Intake'], X_pca['Study_Hours'])  
plt.xlabel('Caffeine_Intake')
plt.ylabel('Study_Hours')
plt.title('PCA: Caffeine Intake vs Study Hours')
plt.grid(True, alpha=0.3)
plt.show()
No description has been provided for this image
In [67]:
plt.scatter(df['Sleep_Quality'], df['Screen_Time'])  
plt.xlabel('Sleep_Quality')
plt.ylabel('Screen_Time')
plt.title('Sleep_Quality vs Screen_Time')
plt.grid(True, alpha=0.3)
plt.show()
No description has been provided for this image
In [69]:
plt.scatter(df['Sleep_Quality'], df['Physical_Activity'])  
plt.xlabel('Sleep_Quality')
plt.ylabel('Physical_Activity')
plt.title('PCA: Sleep_Quality vs Physical_Activity')
plt.grid(True, alpha=0.3)
plt.show()
No description has been provided for this image
In [ ]: