Task 1: Data Cleaning¶
In [3]:
import pandas as pd
In [5]:
df = pd.read_csv('attendance_data.csv')
In [7]:
df.head()
Out[7]:
date | weekno | employee_id | name | status | |
---|---|---|---|---|---|
0 | 01-Apr-22 | W 14 | Atq -458@ | JESSIE CARRILLO | Work From Office |
1 | 01-Apr-22 | W 14 | Atq -400 | chad macias | Work From Office |
2 | 01-Apr-22 | W 14 | Atq -437 | dallas weeks | Work From Office |
3 | 01-Apr-22 | W 14 | Atq -369 | GRETA HORTON | Work From Office |
4 | 01-Apr-22 | W 14 | Atq -374 | KALEY FITZPATRICK | Work From Office |
1. Remove duplicates¶
In [9]:
print("Duplicate Rows Count:" , df.duplicated().sum())
Duplicate Rows Count: 64
In [11]:
print(df[df.duplicated()])
date weekno employee_id name status 388 05-Apr-22 W 15 Atq -374 kaley fitzpatrick Work From Office 389 05-Apr-22 W 15 Atq -470 JOVANI POPE Work From Office 1385 18-Apr-22 W 17 Atq -385 finnegan gilbert Work From Office 1532 20-Apr-22 W 17 Atq -484 demarcus gordon Work From Office 1606 21-Apr-22 W 17 Atq -458 JESSIE CARRILLO Work From Office ... ... ... ... ... ... 6307 26-Jun-22 W 27 Atq -459 leslie navarro Work From Home 6310 26-Jun-22 W 27 Atq -350 ALEXANDER DAVENPORT Work From Home 6312 26-Jun-22 W 27 Atq -348 ZAIDEN WHEELER Work From Home 6313 26-Jun-22 W 27 Atq -350 ALEXANDER DAVENPORT Work From Home 6314 26-Jun-22 W 27 Atq -459 LESLIE NAVARRO Work From Home [64 rows x 5 columns]
In [13]:
pd.set_option('display.max_rows', None) #to see all the rows
In [15]:
print(df[df.duplicated(keep=False)]) #keep = False, to keep duplicates in output
date weekno employee_id name status 312 05-Apr-22 W 15 Atq -374 kaley fitzpatrick Work From Office 313 05-Apr-22 W 15 Atq -470 JOVANI POPE Work From Office 388 05-Apr-22 W 15 Atq -374 kaley fitzpatrick Work From Office 389 05-Apr-22 W 15 Atq -470 JOVANI POPE Work From Office 1310 18-Apr-22 W 17 Atq -385 finnegan gilbert Work From Office 1385 18-Apr-22 W 17 Atq -385 finnegan gilbert Work From Office 1477 20-Apr-22 W 17 Atq -484 demarcus gordon Work From Office 1532 20-Apr-22 W 17 Atq -484 demarcus gordon Work From Office 1575 21-Apr-22 W 17 Atq -458 JESSIE CARRILLO Work From Office 1577 21-Apr-22 W 17 Atq -437 dallas weeks Work From Office 1582 21-Apr-22 W 17 Atq -455 caylee meadows Work From Office 1585 21-Apr-22 W 17 Atq -461 josh sharp Work From Office 1586 21-Apr-22 W 17 Atq -453 NEHEMIAH DIAZ Work From Office 1606 21-Apr-22 W 17 Atq -458 JESSIE CARRILLO Work From Office 1607 21-Apr-22 W 17 Atq -437 dallas weeks Work From Office 1610 21-Apr-22 W 17 Atq -455 caylee meadows Work From Office 1612 21-Apr-22 W 17 Atq -461 josh sharp Work From Office 1613 21-Apr-22 W 17 Atq -453 NEHEMIAH DIAZ Work From Office 1849 25-Apr-22 W 18 Atq -453 nehemiah diaz Work From Office 1924 25-Apr-22 W 18 Atq -453 nehemiah diaz Work From Office 2071 28-Apr-22 W 18 Atq -376 dominique benjamin Work From Office 2072 28-Apr-22 W 18 Atq -418 hillary kirk Work From Office 2073 28-Apr-22 W 18 Atq -373 ISIAH SMALL Work From Office 2142 28-Apr-22 W 18 Atq -376 dominique benjamin Work From Office 2143 28-Apr-22 W 18 Atq -418 hillary kirk Work From Office 2144 28-Apr-22 W 18 Atq -373 ISIAH SMALL Work From Office 2299 02-May-22 W 19 Atq -418 HILLARY KIRK Work From Office 2302 02-May-22 W 19 Atq -418 MARQUIS WYATT Work From Office 2369 02-May-22 W 19 Atq -418 HILLARY KIRK Work From Office 2371 02-May-22 W 19 Atq -418 MARQUIS WYATT Work From Office 2507 05-May-22 W 19 Atq -373 ISIAH SMALL Work From Office 2508 05-May-22 W 19 Atq -408 NICK OROZCO Work From Office 2578 05-May-22 W 19 Atq -373 ISIAH SMALL Work From Office 2579 05-May-22 W 19 Atq -408 NICK OROZCO Work From Office 3049 12-May-22 W 20 Atq -453 nehemiah diaz Work From Office 3050 12-May-22 W 20 Atq -376 dominique benjamin Work From Office 3051 12-May-22 W 20 Atq -418 HILLARY KIRK Work From Office 3113 12-May-22 W 20 Atq -453 nehemiah diaz Work From Office 3114 12-May-22 W 20 Atq -376 dominique benjamin Work From Office 3115 12-May-22 W 20 Atq -418 HILLARY KIRK Work From Office 3363 16-May-22 W 21 Atq -453 NEHEMIAH DIAZ Work From Office 3437 16-May-22 W 21 Atq -453 NEHEMIAH DIAZ Work From Office 3438 17-May-22 W 21 Atq -376 dominique benjamin Work From Office 3513 17-May-22 W 21 Atq -376 dominique benjamin Work From Office 3590 19-May-22 W 21 Atq -418 hillary kirk Work From Office 3591 19-May-22 W 21 Atq -373 isiah small Work From Office 3666 19-May-22 W 21 Atq -418 hillary kirk Work From Office 3667 19-May-22 W 21 Atq -373 isiah small Work From Office 4080 25-May-22 W 22 Atq -418 marquis wyatt Work From Office 4081 25-May-22 W 22 Atq -414 TITUS ANDERSEN Work From Office 4082 25-May-22 W 22 Atq -403 will mahoney Work From Office 4084 25-May-22 W 22 Atq -367 BRILEY ORR Work From Office 4085 25-May-22 W 22 Atq -421 RYLEE MULLINS Work From Office 4086 25-May-22 W 22 Atq -432 RYANN GUERRERO Work From Office 4087 25-May-22 W 22 Atq -484 demarcus gordon Work From Office 4096 25-May-22 W 22 Atq -331 JONAH HAMILTON Work From Office 4135 25-May-22 W 22 Atq -418 marquis wyatt Work From Office 4136 25-May-22 W 22 Atq -414 TITUS ANDERSEN Work From Office 4137 25-May-22 W 22 Atq -403 will mahoney Work From Office 4139 25-May-22 W 22 Atq -367 BRILEY ORR Work From Office 4140 25-May-22 W 22 Atq -421 RYLEE MULLINS Work From Office 4141 25-May-22 W 22 Atq -432 RYANN GUERRERO Work From Office 4142 25-May-22 W 22 Atq -484 demarcus gordon Work From Office 4149 25-May-22 W 22 Atq -331 JONAH HAMILTON Work From Office 4177 26-May-22 W 22 Atq -414 mckayla miles Work From Office 4178 26-May-22 W 22 Atq -461 SHEA ZUNIGA Work From Office 4181 26-May-22 W 22 Atq -458 jessie carrillo Work From Office 4184 26-May-22 W 22 Atq -372 MILEY COMBS Work From Office 4188 26-May-22 W 22 Atq -371 KAYLEIGH KENNEDY Work From Office 4191 26-May-22 W 22 Atq -472 QUINCY MENDEZ Work From Office 4217 26-May-22 W 22 Atq -414 mckayla miles Work From Office 4218 26-May-22 W 22 Atq -461 SHEA ZUNIGA Work From Office 4221 26-May-22 W 22 Atq -458 jessie carrillo Work From Office 4223 26-May-22 W 22 Atq -372 MILEY COMBS Work From Office 4227 26-May-22 W 22 Atq -371 KAYLEIGH KENNEDY Work From Office 4230 26-May-22 W 22 Atq -472 QUINCY MENDEZ Work From Office 4665 01-Jun-22 W 23 Atq -375 hailee fitzpatrick Work From Office 4668 01-Jun-22 W 23 Atq -423 rodrigo price Work From Office 4669 01-Jun-22 W 23 Atq -438 miles fields Work From Office 4673 01-Jun-22 W 23 Atq -419 faith hamilton Work From Office 4676 01-Jun-22 W 23 Atq -343 myla klein Work From Office 4680 01-Jun-22 W 23 Atq -450 miya hampton Work From Office 4700 01-Jun-22 W 23 Atq -375 hailee fitzpatrick Work From Office 4702 01-Jun-22 W 23 Atq -423 rodrigo price Work From Office 4703 01-Jun-22 W 23 Atq -438 miles fields Work From Office 4706 01-Jun-22 W 23 Atq -419 faith hamilton Work From Office 4709 01-Jun-22 W 23 Atq -343 myla klein Work From Office 4712 01-Jun-22 W 23 Atq -450 miya hampton Work From Office 4728 02-Jun-22 W 23 Atq -389 JULIEN HAMILTON Work From Office 4730 02-Jun-22 W 23 Atq -334 charity singleton Work From Office 4731 02-Jun-22 W 23 Atq -452 jaime ritter Work From Office 4733 02-Jun-22 W 23 Atq -476 kane malone Work From Office 4736 02-Jun-22 W 23 Atq -437 dallas weeks Work From Office 4739 02-Jun-22 W 23 Atq -470 jovani pope Work From Office 4776 02-Jun-22 W 23 Atq -389 JULIEN HAMILTON Work From Office 4778 02-Jun-22 W 23 Atq -334 charity singleton Work From Office 4779 02-Jun-22 W 23 Atq -452 jaime ritter Work From Office 4781 02-Jun-22 W 23 Atq -476 kane malone Work From Office 4784 02-Jun-22 W 23 Atq -437 dallas weeks Work From Office 4787 02-Jun-22 W 23 Atq -470 jovani pope Work From Office 5111 07-Jun-22 W 24 Atq -411 samuel murray Work From Office 5187 07-Jun-22 W 24 Atq -411 samuel murray Work From Office 5261 09-Jun-22 W 24 Atq -461 JOSH SHARP Work From Office 5332 09-Jun-22 W 24 Atq -461 JOSH SHARP Work From Office 5649 14-Jun-22 W 25 Atq -418 hillary kirk Work From Office 5723 14-Jun-22 W 25 Atq -418 hillary kirk Work From Office 6094 19-Jun-22 W 26 Atq -374 kaley fitzpatrick Work From Home 6095 19-Jun-22 W 26 Atq -350 ALEXANDER DAVENPORT Work From Home 6096 19-Jun-22 W 26 Atq -459 LESLIE NAVARRO Work From Home 6098 19-Jun-22 W 26 Atq -382 gustavo ritter Work From Home 6101 19-Jun-22 W 26 Atq -348 zaiden wheeler Work From Home 6102 19-Jun-22 W 26 Atq -374 kaley fitzpatrick Work From Home 6103 19-Jun-22 W 26 Atq -350 ALEXANDER DAVENPORT Work From Home 6104 19-Jun-22 W 26 Atq -459 LESLIE NAVARRO Work From Home 6106 19-Jun-22 W 26 Atq -382 gustavo ritter Work From Home 6109 19-Jun-22 W 26 Atq -348 zaiden wheeler Work From Home 6299 26-Jun-22 W 27 Atq -348 zaiden wheeler Work From Home 6300 26-Jun-22 W 27 Atq -350 ALEXANDER DAVENPORT Work From Home 6301 26-Jun-22 W 27 Atq -459 leslie navarro Work From Home 6305 26-Jun-22 W 27 Atq -348 zaiden wheeler Work From Home 6307 26-Jun-22 W 27 Atq -459 leslie navarro Work From Home 6309 26-Jun-22 W 27 Atq -348 ZAIDEN WHEELER Work From Home 6310 26-Jun-22 W 27 Atq -350 ALEXANDER DAVENPORT Work From Home 6311 26-Jun-22 W 27 Atq -459 LESLIE NAVARRO Work From Home 6312 26-Jun-22 W 27 Atq -348 ZAIDEN WHEELER Work From Home 6313 26-Jun-22 W 27 Atq -350 ALEXANDER DAVENPORT Work From Home 6314 26-Jun-22 W 27 Atq -459 LESLIE NAVARRO Work From Home
In [17]:
df_clean = df.drop_duplicates() #deleted duplicates and stored the clean data in to df_clean
2. Formatting the Dates¶
In [19]:
df_clean.describe()
Out[19]:
date | weekno | employee_id | name | status | |
---|---|---|---|---|---|
count | 6272 | 6272 | 6272 | 6272 | 6272 |
unique | 90 | 14 | 106 | 198 | 7 |
top | 07-May-22 | W 22 | Atq -419 | iris woodard | Work From Office |
freq | 86 | 551 | 161 | 57 | 3566 |
In [21]:
df_clean['date'].dtype
Out[21]:
dtype('O')
In [23]:
df_clean = df_clean.copy()
In [25]:
df_clean['date'] = pd.to_datetime(df_clean['date'])
/var/folders/v_/qq_q29jj67nc57x6ldwhjsqm0000gn/T/ipykernel_3021/4103588557.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format. df_clean['date'] = pd.to_datetime(df_clean['date'])
In [29]:
df_clean['date'].dtype
Out[29]:
dtype('<M8[ns]')
In [31]:
print(df_clean.head())
date weekno employee_id name status 0 2022-04-01 W 14 Atq -458@ JESSIE CARRILLO Work From Office 1 2022-04-01 W 14 Atq -400 chad macias Work From Office 2 2022-04-01 W 14 Atq -437 dallas weeks Work From Office 3 2022-04-01 W 14 Atq -369 GRETA HORTON Work From Office 4 2022-04-01 W 14 Atq -374 KALEY FITZPATRICK Work From Office
In [33]:
df_clean['month_name'] = df_clean['date'].dt.strftime('%B')
print(df_clean.head())
date weekno employee_id name status \ 0 2022-04-01 W 14 Atq -458@ JESSIE CARRILLO Work From Office 1 2022-04-01 W 14 Atq -400 chad macias Work From Office 2 2022-04-01 W 14 Atq -437 dallas weeks Work From Office 3 2022-04-01 W 14 Atq -369 GRETA HORTON Work From Office 4 2022-04-01 W 14 Atq -374 KALEY FITZPATRICK Work From Office month_name 0 April 1 April 2 April 3 April 4 April
In [35]:
df_clean['day_type'] = df_clean['date'].dt.dayofweek.apply(lambda x: 'Weekend' if x >= 5 else 'Weekday')
print(df_clean.head())
date weekno employee_id name status \ 0 2022-04-01 W 14 Atq -458@ JESSIE CARRILLO Work From Office 1 2022-04-01 W 14 Atq -400 chad macias Work From Office 2 2022-04-01 W 14 Atq -437 dallas weeks Work From Office 3 2022-04-01 W 14 Atq -369 GRETA HORTON Work From Office 4 2022-04-01 W 14 Atq -374 KALEY FITZPATRICK Work From Office month_name day_type 0 April Weekday 1 April Weekday 2 April Weekday 3 April Weekday 4 April Weekday
3. Remove extra characters¶
In [37]:
df_clean['employee_id'] = df_clean['employee_id'].str.rstrip('@')
In [39]:
print(df_clean.head(40))
date weekno employee_id name status \ 0 2022-04-01 W 14 Atq -458 JESSIE CARRILLO Work From Office 1 2022-04-01 W 14 Atq -400 chad macias Work From Office 2 2022-04-01 W 14 Atq -437 dallas weeks Work From Office 3 2022-04-01 W 14 Atq -369 GRETA HORTON Work From Office 4 2022-04-01 W 14 Atq -374 KALEY FITZPATRICK Work From Office 5 2022-04-01 W 14 Atq -470 JOVANI POPE Work From Office 6 2022-04-01 W 14 Atq -455 caylee meadows Work From Office 7 2022-04-01 W 14 Atq -385 FINNEGAN GILBERT Work From Office 8 2022-04-01 W 14 Atq -387 KONNER HENSON Work From Office 9 2022-04-01 W 14 Atq -443 andrew cummings Work From Office 10 2022-04-01 W 14 Atq -453 NEHEMIAH DIAZ Work From Office 11 2022-04-01 W 14 Atq -376 DOMINIQUE BENJAMIN Work From Office 12 2022-04-01 W 14 Atq -418 hillary kirk Work From Office 13 2022-04-01 W 14 Atq -373 isiah small Work From Office 14 2022-04-01 W 14 Atq -418 marquis wyatt Work From Office 15 2022-04-01 W 14 Atq -414 TITUS ANDERSEN Work From Office 16 2022-04-01 W 14 Atq -403 WILL MAHONEY Work From Office 17 2022-04-01 W 14 Atq -393 emma freeman Work From Office 18 2022-04-01 W 14 Atq -367 BRILEY ORR Work From Office 19 2022-04-01 W 14 Atq -421 rylee mullins Work From Office 20 2022-04-01 W 14 Atq -432 RYANN GUERRERO Work From Office 21 2022-04-01 W 14 Atq -484 DEMARCUS GORDON Work From Office 22 2022-04-01 W 14 Atq -428 CIARA ALLISON Work From Office 23 2022-04-01 W 14 Atq -467 ESPERANZA IRWIN Work From Office 24 2022-04-01 W 14 Atq -396 KARMA REEVES Work From Office 25 2022-04-01 W 14 Atq -472 cristofer bernard Work From Office 26 2022-04-01 W 14 Atq -412 TUCKER AUSTIN Work From Office 27 2022-04-01 W 14 Atq -398 salvatore hendrix Work From Office 28 2022-04-01 W 14 Atq -484 adyson moyer Work From Office 29 2022-04-01 W 14 Atq -385 JASE INGRAM Work From Office 30 2022-04-01 W 14 Atq -456 adriel pace Work From Office 31 2022-04-01 W 14 Atq -375 kenley marsh Work From Office 32 2022-04-01 W 14 Atq -475 LONDON KIM Work From Office 33 2022-04-01 W 14 Atq -435 dean mclaughlin Work From Office 34 2022-04-01 W 14 Atq -348 weston horton Work From Office 35 2022-04-01 W 14 Atq -468 xiomara ruiz Work From Office 36 2022-04-01 W 14 Atq -368 macie callahan Work From Office 37 2022-04-01 W 14 Atq -475 rey novak Work From Office 38 2022-04-01 W 14 Atq -375 hailee fitzpatrick Work From Office 39 2022-04-01 W 14 Atq -433 JAYCE FUENTES Work From Office month_name day_type 0 April Weekday 1 April Weekday 2 April Weekday 3 April Weekday 4 April Weekday 5 April Weekday 6 April Weekday 7 April Weekday 8 April Weekday 9 April Weekday 10 April Weekday 11 April Weekday 12 April Weekday 13 April Weekday 14 April Weekday 15 April Weekday 16 April Weekday 17 April Weekday 18 April Weekday 19 April Weekday 20 April Weekday 21 April Weekday 22 April Weekday 23 April Weekday 24 April Weekday 25 April Weekday 26 April Weekday 27 April Weekday 28 April Weekday 29 April Weekday 30 April Weekday 31 April Weekday 32 April Weekday 33 April Weekday 34 April Weekday 35 April Weekday 36 April Weekday 37 April Weekday 38 April Weekday 39 April Weekday
In [105]:
#again checking for duplicates
In [41]:
print(df_clean[df_clean.duplicated(keep=False)])
Empty DataFrame Columns: [date, weekno, employee_id, name, status, month_name, day_type] Index: []
Capitalise the names¶
In [43]:
df_clean['name'] = df_clean['name'].str.title()
In [45]:
print(df_clean.head())
date weekno employee_id name status \ 0 2022-04-01 W 14 Atq -458 Jessie Carrillo Work From Office 1 2022-04-01 W 14 Atq -400 Chad Macias Work From Office 2 2022-04-01 W 14 Atq -437 Dallas Weeks Work From Office 3 2022-04-01 W 14 Atq -369 Greta Horton Work From Office 4 2022-04-01 W 14 Atq -374 Kaley Fitzpatrick Work From Office month_name day_type 0 April Weekday 1 April Weekday 2 April Weekday 3 April Weekday 4 April Weekday
In [47]:
# Function to create alias from status string
def create_alias(status):
if pd.isna(status):
return ""
# Split status by space, take first letter of each word, join together as uppercase
return ''.join(word[0].upper() for word in status.split())
# Apply function to status column to create new aliases column
df_clean['aliases'] = df_clean['status'].apply(create_alias)
print(df_clean.head())
date weekno employee_id name status \ 0 2022-04-01 W 14 Atq -458 Jessie Carrillo Work From Office 1 2022-04-01 W 14 Atq -400 Chad Macias Work From Office 2 2022-04-01 W 14 Atq -437 Dallas Weeks Work From Office 3 2022-04-01 W 14 Atq -369 Greta Horton Work From Office 4 2022-04-01 W 14 Atq -374 Kaley Fitzpatrick Work From Office month_name day_type aliases 0 April Weekday WFO 1 April Weekday WFO 2 April Weekday WFO 3 April Weekday WFO 4 April Weekday WFO
In [49]:
df_clean['name'] = df_clean['name'].str.strip()
In [51]:
df['name'] = df['name'].str.replace(r'[^\x00-\x7F]+', '', regex=True)
In [87]:
### Droping Duplicates after cleaning the data
In [71]:
print(df_clean[df_clean.duplicated(keep=False)])
date weekno employee_id name status \ 1006 2022-04-14 W 16 Atq -470 Jovani Pope Work From Office 1008 2022-04-14 W 16 Atq -385 Finnegan Gilbert Work From Office 1077 2022-04-14 W 16 Atq -470 Jovani Pope Work From Office 1078 2022-04-14 W 16 Atq -385 Finnegan Gilbert Work From Office 1309 2022-04-18 W 17 Atq -455 Caylee Meadows Work From Office 1384 2022-04-18 W 17 Atq -455 Caylee Meadows Work From Office 1475 2022-04-20 W 17 Atq -421 Rylee Mullins Work From Office 1476 2022-04-20 W 17 Atq -432 Ryann Guerrero Work From Office 1478 2022-04-20 W 17 Atq -428 Ciara Allison Work From Office 1479 2022-04-20 W 17 Atq -467 Esperanza Irwin Work From Office 1480 2022-04-20 W 17 Atq -396 Karma Reeves Work From Office 1481 2022-04-20 W 17 Atq -472 Cristofer Bernard Work From Office 1482 2022-04-20 W 17 Atq -412 Tucker Austin Work From Office 1530 2022-04-20 W 17 Atq -421 Rylee Mullins Work From Office 1531 2022-04-20 W 17 Atq -432 Ryann Guerrero Work From Office 1533 2022-04-20 W 17 Atq -428 Ciara Allison Work From Office 1534 2022-04-20 W 17 Atq -467 Esperanza Irwin Work From Office 1535 2022-04-20 W 17 Atq -396 Karma Reeves Work From Office 1536 2022-04-20 W 17 Atq -472 Cristofer Bernard Work From Office 1537 2022-04-20 W 17 Atq -412 Tucker Austin Work From Office 1578 2022-04-21 W 17 Atq -372 Miley Combs Work From Office 1579 2022-04-21 W 17 Atq -369 Greta Horton Work From Office 1584 2022-04-21 W 17 Atq -387 Konner Henson Work From Office 1608 2022-04-21 W 17 Atq -372 Miley Combs Work From Office 1609 2022-04-21 W 17 Atq -369 Greta Horton Work From Office 1611 2022-04-21 W 17 Atq -387 Konner Henson Work From Office 2300 2022-05-02 W 19 Atq -373 Isiah Small Work From Office 2370 2022-05-02 W 19 Atq -373 Isiah Small Work From Office 3048 2022-05-12 W 20 Atq -461 Josh Sharp Work From Office 3052 2022-05-12 W 20 Atq -373 Isiah Small Work From Office 3053 2022-05-12 W 20 Atq -418 Marquis Wyatt Work From Office 3112 2022-05-12 W 20 Atq -461 Josh Sharp Work From Office 3116 2022-05-12 W 20 Atq -373 Isiah Small Work From Office 3117 2022-05-12 W 20 Atq -418 Marquis Wyatt Work From Office 3362 2022-05-16 W 21 Atq -461 Josh Sharp Work From Office 3436 2022-05-16 W 21 Atq -461 Josh Sharp Work From Office 3916 2022-05-23 W 22 Atq -411 Samuel Murray Work From Office 3995 2022-05-23 W 22 Atq -411 Samuel Murray Work From Office 4083 2022-05-25 W 22 Atq -393 Emma Freeman Work From Office 4088 2022-05-25 W 22 Atq -467 Esperanza Irwin Work From Office 4089 2022-05-25 W 22 Atq -396 Karma Reeves Work From Office 4091 2022-05-25 W 22 Atq -412 Tucker Austin Work From Office 4092 2022-05-25 W 22 Atq -385 Jase Ingram Work From Office 4093 2022-05-25 W 22 Atq -456 Adriel Pace Work From Office 4094 2022-05-25 W 22 Atq -475 London Kim Work From Office 4138 2022-05-25 W 22 Atq -393 Emma Freeman Work From Office 4143 2022-05-25 W 22 Atq -467 Esperanza Irwin Work From Office 4144 2022-05-25 W 22 Atq -396 Karma Reeves Work From Office 4145 2022-05-25 W 22 Atq -412 Tucker Austin Work From Office 4146 2022-05-25 W 22 Atq -385 Jase Ingram Work From Office 4147 2022-05-25 W 22 Atq -456 Adriel Pace Work From Office 4148 2022-05-25 W 22 Atq -475 London Kim Work From Office 4176 2022-05-26 W 22 Atq -389 Julien Hamilton Work From Office 4179 2022-05-26 W 22 Atq -426 Isabella Pittman Work From Office 4180 2022-05-26 W 22 Atq -362 Kiana Mcmillan Work From Office 4182 2022-05-26 W 22 Atq -400 Chad Macias Work From Office 4185 2022-05-26 W 22 Atq -374 Kaley Fitzpatrick Work From Office 4186 2022-05-26 W 22 Atq -470 Jovani Pope Work From Office 4187 2022-05-26 W 22 Atq -387 Konner Henson Work From Office 4189 2022-05-26 W 22 Atq -364 Keaton Nixon Work From Office 4190 2022-05-26 W 22 Atq -448 Athena Rios Work From Office 4216 2022-05-26 W 22 Atq -389 Julien Hamilton Work From Office 4219 2022-05-26 W 22 Atq -426 Isabella Pittman Work From Office 4220 2022-05-26 W 22 Atq -362 Kiana Mcmillan Work From Office 4222 2022-05-26 W 22 Atq -400 Chad Macias Work From Office 4224 2022-05-26 W 22 Atq -374 Kaley Fitzpatrick Work From Office 4225 2022-05-26 W 22 Atq -470 Jovani Pope Work From Office 4226 2022-05-26 W 22 Atq -387 Konner Henson Work From Office 4228 2022-05-26 W 22 Atq -364 Keaton Nixon Work From Office 4229 2022-05-26 W 22 Atq -448 Athena Rios Work From Office 4666 2022-06-01 W 23 Atq -433 Jayce Fuentes Work From Office 4671 2022-06-01 W 23 Atq -459 Iris Woodard Work From Office 4672 2022-06-01 W 23 Atq -396 Joel Cruz Work From Office 4674 2022-06-01 W 23 Atq -446 April Ayers Work From Office 4675 2022-06-01 W 23 Atq -334 Boston Morse Work From Office 4678 2022-06-01 W 23 Atq -440 Mckayla Parker Work From Office 4679 2022-06-01 W 23 Atq -405 Sofia Solis Work From Office 4681 2022-06-01 W 23 Atq -467 Cason David Work From Office 4682 2022-06-01 W 23 Atq -419 Kaylah Schultz Work From Office 4701 2022-06-01 W 23 Atq -433 Jayce Fuentes Work From Office 4704 2022-06-01 W 23 Atq -459 Iris Woodard Work From Office 4705 2022-06-01 W 23 Atq -396 Joel Cruz Work From Office 4707 2022-06-01 W 23 Atq -446 April Ayers Work From Office 4708 2022-06-01 W 23 Atq -334 Boston Morse Work From Office 4710 2022-06-01 W 23 Atq -440 Mckayla Parker Work From Office 4711 2022-06-01 W 23 Atq -405 Sofia Solis Work From Office 4713 2022-06-01 W 23 Atq -467 Cason David Work From Office 4714 2022-06-01 W 23 Atq -419 Kaylah Schultz Work From Office 4727 2022-06-02 W 23 Atq -385 Alyson Huber Work From Office 4729 2022-06-02 W 23 Atq -414 Mckayla Miles Work From Office 4732 2022-06-02 W 23 Atq -362 Kiana Mcmillan Work From Office 4734 2022-06-02 W 23 Atq -455 Aditya Walls Work From Office 4735 2022-06-02 W 23 Atq -458 Jessie Carrillo Work From Office 4737 2022-06-02 W 23 Atq -372 Miley Combs Work From Office 4738 2022-06-02 W 23 Atq -374 Kaley Fitzpatrick Work From Office 4740 2022-06-02 W 23 Atq -401 Piper Carroll Work From Office 4741 2022-06-02 W 23 Atq -371 Kayleigh Kennedy Work From Office 4775 2022-06-02 W 23 Atq -385 Alyson Huber Work From Office 4777 2022-06-02 W 23 Atq -414 Mckayla Miles Work From Office 4780 2022-06-02 W 23 Atq -362 Kiana Mcmillan Work From Office 4782 2022-06-02 W 23 Atq -455 Aditya Walls Work From Office 4783 2022-06-02 W 23 Atq -458 Jessie Carrillo Work From Office 4785 2022-06-02 W 23 Atq -372 Miley Combs Work From Office 4786 2022-06-02 W 23 Atq -374 Kaley Fitzpatrick Work From Office 4788 2022-06-02 W 23 Atq -401 Piper Carroll Work From Office 4789 2022-06-02 W 23 Atq -371 Kayleigh Kennedy Work From Office 5034 2022-06-06 W 24 Atq -472 Quincy Mendez Work From Office 5110 2022-06-06 W 24 Atq -472 Quincy Mendez Work From Office 5262 2022-06-09 W 24 Atq -376 Dominique Benjamin Work From Office 5333 2022-06-09 W 24 Atq -376 Dominique Benjamin Work From Office 5571 2022-06-13 W 25 Atq -376 Dominique Benjamin Work From Office 5648 2022-06-13 W 25 Atq -376 Dominique Benjamin Work From Office 6093 2022-06-19 W 26 Atq -348 Zaiden Wheeler Work From Home 6094 2022-06-19 W 26 Atq -374 Kaley Fitzpatrick Work From Home 6097 2022-06-19 W 26 Atq -361 Makai Hardy Work From Home 6099 2022-06-19 W 26 Atq -398 Bo Cordova Work From Home 6101 2022-06-19 W 26 Atq -348 Zaiden Wheeler Work From Home 6105 2022-06-19 W 26 Atq -361 Makai Hardy Work From Home 6107 2022-06-19 W 26 Atq -398 Bo Cordova Work From Home 6110 2022-06-19 W 26 Atq -374 Kaley Fitzpatrick Work From Home 6299 2022-06-26 W 27 Atq -348 Zaiden Wheeler Work From Home 6300 2022-06-26 W 27 Atq -350 Alexander Davenport Work From Home 6301 2022-06-26 W 27 Atq -459 Leslie Navarro Work From Home 6302 2022-06-26 W 27 Atq -398 Bo Cordova Work From Home 6306 2022-06-26 W 27 Atq -350 Alexander Davenport Work From Home 6308 2022-06-26 W 27 Atq -398 Bo Cordova Work From Home 6309 2022-06-26 W 27 Atq -348 Zaiden Wheeler Work From Home 6311 2022-06-26 W 27 Atq -459 Leslie Navarro Work From Home month_name day_type aliases 1006 April Weekday WFO 1008 April Weekday WFO 1077 April Weekday WFO 1078 April Weekday WFO 1309 April Weekday WFO 1384 April Weekday WFO 1475 April Weekday WFO 1476 April Weekday WFO 1478 April Weekday WFO 1479 April Weekday WFO 1480 April Weekday WFO 1481 April Weekday WFO 1482 April Weekday WFO 1530 April Weekday WFO 1531 April Weekday WFO 1533 April Weekday WFO 1534 April Weekday WFO 1535 April Weekday WFO 1536 April Weekday WFO 1537 April Weekday WFO 1578 April Weekday WFO 1579 April Weekday WFO 1584 April Weekday WFO 1608 April Weekday WFO 1609 April Weekday WFO 1611 April Weekday WFO 2300 May Weekday WFO 2370 May Weekday WFO 3048 May Weekday WFO 3052 May Weekday WFO 3053 May Weekday WFO 3112 May Weekday WFO 3116 May Weekday WFO 3117 May Weekday WFO 3362 May Weekday WFO 3436 May Weekday WFO 3916 May Weekday WFO 3995 May Weekday WFO 4083 May Weekday WFO 4088 May Weekday WFO 4089 May Weekday WFO 4091 May Weekday WFO 4092 May Weekday WFO 4093 May Weekday WFO 4094 May Weekday WFO 4138 May Weekday WFO 4143 May Weekday WFO 4144 May Weekday WFO 4145 May Weekday WFO 4146 May Weekday WFO 4147 May Weekday WFO 4148 May Weekday WFO 4176 May Weekday WFO 4179 May Weekday WFO 4180 May Weekday WFO 4182 May Weekday WFO 4185 May Weekday WFO 4186 May Weekday WFO 4187 May Weekday WFO 4189 May Weekday WFO 4190 May Weekday WFO 4216 May Weekday WFO 4219 May Weekday WFO 4220 May Weekday WFO 4222 May Weekday WFO 4224 May Weekday WFO 4225 May Weekday WFO 4226 May Weekday WFO 4228 May Weekday WFO 4229 May Weekday WFO 4666 June Weekday WFO 4671 June Weekday WFO 4672 June Weekday WFO 4674 June Weekday WFO 4675 June Weekday WFO 4678 June Weekday WFO 4679 June Weekday WFO 4681 June Weekday WFO 4682 June Weekday WFO 4701 June Weekday WFO 4704 June Weekday WFO 4705 June Weekday WFO 4707 June Weekday WFO 4708 June Weekday WFO 4710 June Weekday WFO 4711 June Weekday WFO 4713 June Weekday WFO 4714 June Weekday WFO 4727 June Weekday WFO 4729 June Weekday WFO 4732 June Weekday WFO 4734 June Weekday WFO 4735 June Weekday WFO 4737 June Weekday WFO 4738 June Weekday WFO 4740 June Weekday WFO 4741 June Weekday WFO 4775 June Weekday WFO 4777 June Weekday WFO 4780 June Weekday WFO 4782 June Weekday WFO 4783 June Weekday WFO 4785 June Weekday WFO 4786 June Weekday WFO 4788 June Weekday WFO 4789 June Weekday WFO 5034 June Weekday WFO 5110 June Weekday WFO 5262 June Weekday WFO 5333 June Weekday WFO 5571 June Weekday WFO 5648 June Weekday WFO 6093 June Weekend WFH 6094 June Weekend WFH 6097 June Weekend WFH 6099 June Weekend WFH 6101 June Weekend WFH 6105 June Weekend WFH 6107 June Weekend WFH 6110 June Weekend WFH 6299 June Weekend WFH 6300 June Weekend WFH 6301 June Weekend WFH 6302 June Weekend WFH 6306 June Weekend WFH 6308 June Weekend WFH 6309 June Weekend WFH 6311 June Weekend WFH
In [75]:
df_clean = df_clean.drop_duplicates()
Task 2: Ad Hoc aka Analysis¶
In [77]:
# 1. What is the total count of distinct employee names within the dataset?
distinct_name_count = df_clean['name'].nunique()
print("Total distinct employee names:", distinct_name_count)
Total distinct employee names: 99
In [79]:
# 2. Calculate the work-from-home (WFH %) percentage in the month of May.
may_data = df_clean[df_clean['date'].dt.strftime('%m') == '05']
wfh_may = len(may_data[may_data['status'] == "Work From Home"])
print("WFH count in May: ", wfh_may)
WFH count in May: 173
In [81]:
# Count WFH days in May
wfh_days = (may_data['status'] == "Work From Home").sum()
# Count WFO days in May
wfo_days = (may_data['status'] == "Work From Office").sum()
# Total present days = WFH + WFO
total_present_days = wfh_days + wfo_days
print("Total working days in may: ",total_present_days)
Total working days in may: 1506
In [85]:
wfh_percentage1 = (wfh_may / total_present_days) * 100 if total_present_days > 0 else 0
print(f"WFH % in May: {wfh_percentage1:.2f}%")
WFH % in May: 11.49%
In [61]:
# Filter rows where status is WFH or WFO
present_data = may_data[
(may_data['status'] == "Work From Home") |
(may_data['status'] == "Work From Office")
]
# Export all present-day rows to CSV
present_data.to_csv("total_present_days.csv", index=False)
print("CSV file saved as total_present_days.csv with all present-day rows")
CSV file saved as total_present_days.csv with all present-day rows
In [ ]: