import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, normalize
df = pd.read_csv('student_sleep_patterns.csv')
df

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Student_ID           500 non-null    int64  
 1   Age                  500 non-null    int64  
 2   Gender               500 non-null    object 
 3   University_Year      500 non-null    object 
 4   Sleep_Duration       500 non-null    float64
 5   Study_Hours          500 non-null    float64
 6   Screen_Time          500 non-null    float64
 7   Caffeine_Intake      500 non-null    int64  
 8   Physical_Activity    500 non-null    int64  
 9   Sleep_Quality        500 non-null    int64  
 10  Weekday_Sleep_Start  500 non-null    float64
 11  Weekend_Sleep_Start  500 non-null    float64
 12  Weekday_Sleep_End    500 non-null    float64
 13  Weekend_Sleep_End    500 non-null    float64
dtypes: float64(7), int64(5), object(2)
memory usage: 54.8+ KB

df.shape

(500, 14)

columns = ['University_Year', 'Screen_Time', 'Caffeine_Intake',]

for col in columns:
    x = df.groupby([col])['Sleep_Quality'].mean().reset_index()
    sns.lineplot(x=col, y='Sleep_Quality', data=x)
    plt.title(f'Sleep Quality by {col}')
    plt.show()

from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA

scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)
normalized_X = normalize(scaled_X) 
normalized_X = pd.DataFrame(normalized_X) 

pca = PCA(n_components=2) 
X_pca = pca.fit_transform(normalized_X) 
X_pca = pd.DataFrame(X_pca) 
X_pca.columns = ['Caffeine_Intake', 'Study_Hours'] 

plt.figure(figsize=(10, 6), dpi=200)
plt.scatter(X_pca['Caffeine_Intake'], X_pca['Study_Hours'])  
plt.xlabel('Caffeine_Intake')
plt.ylabel('Study_Hours')
plt.title('PCA: Caffeine Intake vs Study Hours')
plt.grid(True, alpha=0.3)
plt.show()

plt.scatter(df['Sleep_Quality'], df['Screen_Time'])  
plt.xlabel('Sleep_Quality')
plt.ylabel('Screen_Time')
plt.title('Sleep_Quality vs Screen_Time')
plt.grid(True, alpha=0.3)
plt.show()

plt.scatter(df['Sleep_Quality'], df['Physical_Activity'])  
plt.xlabel('Sleep_Quality')
plt.ylabel('Physical_Activity')
plt.title('PCA: Sleep_Quality vs Physical_Activity')
plt.grid(True, alpha=0.3)
plt.show()

	Student_ID	Age	Gender	University_Year	Sleep_Duration	Study_Hours	Screen_Time	Caffeine_Intake	Physical_Activity	Sleep_Quality	Weekday_Sleep_Start	Weekend_Sleep_Start	Weekday_Sleep_End	Weekend_Sleep_End
0	1	24	Other	2nd Year	7.7	7.9	3.4	2	37	10	14.16	4.05	7.41	7.06
1	2	21	Male	1st Year	6.3	6.0	1.9	5	74	2	8.73	7.10	8.21	10.21
2	3	22	Male	4th Year	5.1	6.7	3.9	5	53	5	20.00	20.47	6.88	10.92
3	4	24	Other	4th Year	6.3	8.6	2.8	4	55	9	19.82	4.08	6.69	9.42
4	5	20	Male	4th Year	4.7	2.7	2.7	0	85	3	20.98	6.12	8.98	9.01
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
495	496	24	Male	2nd Year	5.1	9.3	1.9	4	110	4	17.42	8.43	6.93	10.78
496	497	20	Male	2nd Year	8.9	7.7	3.5	3	40	4	1.22	15.54	5.85	7.23
497	498	21	Male	3rd Year	5.7	6.4	3.9	1	68	10	9.94	2.25	5.46	10.72
498	499	18	Female	2nd Year	4.9	0.5	3.5	0	12	2	19.10	15.49	8.35	7.20
499	500	21	Male	3rd Year	7.9	11.6	1.0	0	86	1	7.54	14.12	7.01	9.19