Task 1: Data Cleaning¶

In [3]:
import pandas as pd
In [5]:
df = pd.read_csv('attendance_data.csv')
In [7]:
df.head()
Out[7]:
date weekno employee_id name status
0 01-Apr-22 W 14 Atq -458@ JESSIE CARRILLO Work From Office
1 01-Apr-22 W 14 Atq -400 chad macias Work From Office
2 01-Apr-22 W 14 Atq -437 dallas weeks Work From Office
3 01-Apr-22 W 14 Atq -369 GRETA HORTON Work From Office
4 01-Apr-22 W 14 Atq -374 KALEY FITZPATRICK Work From Office

1. Remove duplicates¶

In [9]:
print("Duplicate Rows Count:" , df.duplicated().sum())
Duplicate Rows Count: 64
In [11]:
print(df[df.duplicated()])
           date weekno employee_id                 name            status
388   05-Apr-22   W 15    Atq -374    kaley fitzpatrick  Work From Office
389   05-Apr-22   W 15    Atq -470          JOVANI POPE  Work From Office
1385  18-Apr-22   W 17    Atq -385     finnegan gilbert  Work From Office
1532  20-Apr-22   W 17    Atq -484      demarcus gordon  Work From Office
1606  21-Apr-22   W 17    Atq -458      JESSIE CARRILLO  Work From Office
...         ...    ...         ...                  ...               ...
6307  26-Jun-22   W 27    Atq -459       leslie navarro    Work From Home
6310  26-Jun-22   W 27    Atq -350  ALEXANDER DAVENPORT    Work From Home
6312  26-Jun-22   W 27    Atq -348       ZAIDEN WHEELER    Work From Home
6313  26-Jun-22   W 27    Atq -350  ALEXANDER DAVENPORT    Work From Home
6314  26-Jun-22   W 27    Atq -459       LESLIE NAVARRO    Work From Home

[64 rows x 5 columns]
In [13]:
pd.set_option('display.max_rows', None) #to see all the rows
In [15]:
print(df[df.duplicated(keep=False)]) #keep = False, to keep duplicates in output
           date weekno employee_id                 name            status
312   05-Apr-22   W 15    Atq -374    kaley fitzpatrick  Work From Office
313   05-Apr-22   W 15    Atq -470          JOVANI POPE  Work From Office
388   05-Apr-22   W 15    Atq -374    kaley fitzpatrick  Work From Office
389   05-Apr-22   W 15    Atq -470          JOVANI POPE  Work From Office
1310  18-Apr-22   W 17    Atq -385     finnegan gilbert  Work From Office
1385  18-Apr-22   W 17    Atq -385     finnegan gilbert  Work From Office
1477  20-Apr-22   W 17    Atq -484      demarcus gordon  Work From Office
1532  20-Apr-22   W 17    Atq -484      demarcus gordon  Work From Office
1575  21-Apr-22   W 17    Atq -458      JESSIE CARRILLO  Work From Office
1577  21-Apr-22   W 17    Atq -437         dallas weeks  Work From Office
1582  21-Apr-22   W 17    Atq -455       caylee meadows  Work From Office
1585  21-Apr-22   W 17    Atq -461           josh sharp  Work From Office
1586  21-Apr-22   W 17    Atq -453        NEHEMIAH DIAZ  Work From Office
1606  21-Apr-22   W 17    Atq -458      JESSIE CARRILLO  Work From Office
1607  21-Apr-22   W 17    Atq -437         dallas weeks  Work From Office
1610  21-Apr-22   W 17    Atq -455       caylee meadows  Work From Office
1612  21-Apr-22   W 17    Atq -461           josh sharp  Work From Office
1613  21-Apr-22   W 17    Atq -453        NEHEMIAH DIAZ  Work From Office
1849  25-Apr-22   W 18    Atq -453        nehemiah diaz  Work From Office
1924  25-Apr-22   W 18    Atq -453        nehemiah diaz  Work From Office
2071  28-Apr-22   W 18    Atq -376   dominique benjamin  Work From Office
2072  28-Apr-22   W 18    Atq -418         hillary kirk  Work From Office
2073  28-Apr-22   W 18    Atq -373          ISIAH SMALL  Work From Office
2142  28-Apr-22   W 18    Atq -376   dominique benjamin  Work From Office
2143  28-Apr-22   W 18    Atq -418         hillary kirk  Work From Office
2144  28-Apr-22   W 18    Atq -373          ISIAH SMALL  Work From Office
2299  02-May-22   W 19    Atq -418         HILLARY KIRK  Work From Office
2302  02-May-22   W 19    Atq -418        MARQUIS WYATT  Work From Office
2369  02-May-22   W 19    Atq -418         HILLARY KIRK  Work From Office
2371  02-May-22   W 19    Atq -418        MARQUIS WYATT  Work From Office
2507  05-May-22   W 19    Atq -373          ISIAH SMALL  Work From Office
2508  05-May-22   W 19    Atq -408          NICK OROZCO  Work From Office
2578  05-May-22   W 19    Atq -373          ISIAH SMALL  Work From Office
2579  05-May-22   W 19    Atq -408          NICK OROZCO  Work From Office
3049  12-May-22   W 20    Atq -453        nehemiah diaz  Work From Office
3050  12-May-22   W 20    Atq -376   dominique benjamin  Work From Office
3051  12-May-22   W 20    Atq -418         HILLARY KIRK  Work From Office
3113  12-May-22   W 20    Atq -453        nehemiah diaz  Work From Office
3114  12-May-22   W 20    Atq -376   dominique benjamin  Work From Office
3115  12-May-22   W 20    Atq -418         HILLARY KIRK  Work From Office
3363  16-May-22   W 21    Atq -453        NEHEMIAH DIAZ  Work From Office
3437  16-May-22   W 21    Atq -453        NEHEMIAH DIAZ  Work From Office
3438  17-May-22   W 21    Atq -376   dominique benjamin  Work From Office
3513  17-May-22   W 21    Atq -376   dominique benjamin  Work From Office
3590  19-May-22   W 21    Atq -418         hillary kirk  Work From Office
3591  19-May-22   W 21    Atq -373          isiah small  Work From Office
3666  19-May-22   W 21    Atq -418         hillary kirk  Work From Office
3667  19-May-22   W 21    Atq -373          isiah small  Work From Office
4080  25-May-22   W 22    Atq -418        marquis wyatt  Work From Office
4081  25-May-22   W 22    Atq -414       TITUS ANDERSEN  Work From Office
4082  25-May-22   W 22    Atq -403         will mahoney  Work From Office
4084  25-May-22   W 22    Atq -367           BRILEY ORR  Work From Office
4085  25-May-22   W 22    Atq -421        RYLEE MULLINS  Work From Office
4086  25-May-22   W 22    Atq -432       RYANN GUERRERO  Work From Office
4087  25-May-22   W 22    Atq -484      demarcus gordon  Work From Office
4096  25-May-22   W 22    Atq -331       JONAH HAMILTON  Work From Office
4135  25-May-22   W 22    Atq -418        marquis wyatt  Work From Office
4136  25-May-22   W 22    Atq -414       TITUS ANDERSEN  Work From Office
4137  25-May-22   W 22    Atq -403         will mahoney  Work From Office
4139  25-May-22   W 22    Atq -367           BRILEY ORR  Work From Office
4140  25-May-22   W 22    Atq -421        RYLEE MULLINS  Work From Office
4141  25-May-22   W 22    Atq -432       RYANN GUERRERO  Work From Office
4142  25-May-22   W 22    Atq -484      demarcus gordon  Work From Office
4149  25-May-22   W 22    Atq -331       JONAH HAMILTON  Work From Office
4177  26-May-22   W 22    Atq -414        mckayla miles  Work From Office
4178  26-May-22   W 22    Atq -461          SHEA ZUNIGA  Work From Office
4181  26-May-22   W 22    Atq -458      jessie carrillo  Work From Office
4184  26-May-22   W 22    Atq -372          MILEY COMBS  Work From Office
4188  26-May-22   W 22    Atq -371     KAYLEIGH KENNEDY  Work From Office
4191  26-May-22   W 22    Atq -472        QUINCY MENDEZ  Work From Office
4217  26-May-22   W 22    Atq -414        mckayla miles  Work From Office
4218  26-May-22   W 22    Atq -461          SHEA ZUNIGA  Work From Office
4221  26-May-22   W 22    Atq -458      jessie carrillo  Work From Office
4223  26-May-22   W 22    Atq -372          MILEY COMBS  Work From Office
4227  26-May-22   W 22    Atq -371     KAYLEIGH KENNEDY  Work From Office
4230  26-May-22   W 22    Atq -472        QUINCY MENDEZ  Work From Office
4665  01-Jun-22   W 23    Atq -375   hailee fitzpatrick  Work From Office
4668  01-Jun-22   W 23    Atq -423        rodrigo price  Work From Office
4669  01-Jun-22   W 23    Atq -438         miles fields  Work From Office
4673  01-Jun-22   W 23    Atq -419       faith hamilton  Work From Office
4676  01-Jun-22   W 23    Atq -343           myla klein  Work From Office
4680  01-Jun-22   W 23    Atq -450         miya hampton  Work From Office
4700  01-Jun-22   W 23    Atq -375   hailee fitzpatrick  Work From Office
4702  01-Jun-22   W 23    Atq -423        rodrigo price  Work From Office
4703  01-Jun-22   W 23    Atq -438         miles fields  Work From Office
4706  01-Jun-22   W 23    Atq -419       faith hamilton  Work From Office
4709  01-Jun-22   W 23    Atq -343           myla klein  Work From Office
4712  01-Jun-22   W 23    Atq -450         miya hampton  Work From Office
4728  02-Jun-22   W 23    Atq -389      JULIEN HAMILTON  Work From Office
4730  02-Jun-22   W 23    Atq -334    charity singleton  Work From Office
4731  02-Jun-22   W 23    Atq -452         jaime ritter  Work From Office
4733  02-Jun-22   W 23    Atq -476          kane malone  Work From Office
4736  02-Jun-22   W 23    Atq -437         dallas weeks  Work From Office
4739  02-Jun-22   W 23    Atq -470          jovani pope  Work From Office
4776  02-Jun-22   W 23    Atq -389      JULIEN HAMILTON  Work From Office
4778  02-Jun-22   W 23    Atq -334    charity singleton  Work From Office
4779  02-Jun-22   W 23    Atq -452         jaime ritter  Work From Office
4781  02-Jun-22   W 23    Atq -476          kane malone  Work From Office
4784  02-Jun-22   W 23    Atq -437         dallas weeks  Work From Office
4787  02-Jun-22   W 23    Atq -470          jovani pope  Work From Office
5111  07-Jun-22   W 24    Atq -411        samuel murray  Work From Office
5187  07-Jun-22   W 24    Atq -411        samuel murray  Work From Office
5261  09-Jun-22   W 24    Atq -461           JOSH SHARP  Work From Office
5332  09-Jun-22   W 24    Atq -461           JOSH SHARP  Work From Office
5649  14-Jun-22   W 25    Atq -418         hillary kirk  Work From Office
5723  14-Jun-22   W 25    Atq -418         hillary kirk  Work From Office
6094  19-Jun-22   W 26    Atq -374    kaley fitzpatrick    Work From Home
6095  19-Jun-22   W 26    Atq -350  ALEXANDER DAVENPORT    Work From Home
6096  19-Jun-22   W 26    Atq -459       LESLIE NAVARRO    Work From Home
6098  19-Jun-22   W 26    Atq -382       gustavo ritter    Work From Home
6101  19-Jun-22   W 26    Atq -348       zaiden wheeler    Work From Home
6102  19-Jun-22   W 26    Atq -374    kaley fitzpatrick    Work From Home
6103  19-Jun-22   W 26    Atq -350  ALEXANDER DAVENPORT    Work From Home
6104  19-Jun-22   W 26    Atq -459       LESLIE NAVARRO    Work From Home
6106  19-Jun-22   W 26    Atq -382       gustavo ritter    Work From Home
6109  19-Jun-22   W 26    Atq -348       zaiden wheeler    Work From Home
6299  26-Jun-22   W 27    Atq -348       zaiden wheeler    Work From Home
6300  26-Jun-22   W 27    Atq -350  ALEXANDER DAVENPORT    Work From Home
6301  26-Jun-22   W 27    Atq -459       leslie navarro    Work From Home
6305  26-Jun-22   W 27    Atq -348       zaiden wheeler    Work From Home
6307  26-Jun-22   W 27    Atq -459       leslie navarro    Work From Home
6309  26-Jun-22   W 27    Atq -348       ZAIDEN WHEELER    Work From Home
6310  26-Jun-22   W 27    Atq -350  ALEXANDER DAVENPORT    Work From Home
6311  26-Jun-22   W 27    Atq -459       LESLIE NAVARRO    Work From Home
6312  26-Jun-22   W 27    Atq -348       ZAIDEN WHEELER    Work From Home
6313  26-Jun-22   W 27    Atq -350  ALEXANDER DAVENPORT    Work From Home
6314  26-Jun-22   W 27    Atq -459       LESLIE NAVARRO    Work From Home
In [17]:
df_clean = df.drop_duplicates() #deleted duplicates and stored the clean data in to df_clean

2. Formatting the Dates¶

In [19]:
df_clean.describe()
Out[19]:
date weekno employee_id name status
count 6272 6272 6272 6272 6272
unique 90 14 106 198 7
top 07-May-22 W 22 Atq -419 iris woodard Work From Office
freq 86 551 161 57 3566
In [21]:
df_clean['date'].dtype 
Out[21]:
dtype('O')
In [23]:
df_clean = df_clean.copy()
In [25]:
df_clean['date'] = pd.to_datetime(df_clean['date'])
/var/folders/v_/qq_q29jj67nc57x6ldwhjsqm0000gn/T/ipykernel_3021/4103588557.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
  df_clean['date'] = pd.to_datetime(df_clean['date'])
In [29]:
df_clean['date'].dtype
Out[29]:
dtype('<M8[ns]')
In [31]:
print(df_clean.head())
        date weekno employee_id               name            status
0 2022-04-01   W 14   Atq -458@    JESSIE CARRILLO  Work From Office
1 2022-04-01   W 14    Atq -400        chad macias  Work From Office
2 2022-04-01   W 14    Atq -437       dallas weeks  Work From Office
3 2022-04-01   W 14    Atq -369       GRETA HORTON  Work From Office
4 2022-04-01   W 14    Atq -374  KALEY FITZPATRICK  Work From Office
In [33]:
df_clean['month_name'] = df_clean['date'].dt.strftime('%B')
print(df_clean.head())
        date weekno employee_id               name            status  \
0 2022-04-01   W 14   Atq -458@    JESSIE CARRILLO  Work From Office   
1 2022-04-01   W 14    Atq -400        chad macias  Work From Office   
2 2022-04-01   W 14    Atq -437       dallas weeks  Work From Office   
3 2022-04-01   W 14    Atq -369       GRETA HORTON  Work From Office   
4 2022-04-01   W 14    Atq -374  KALEY FITZPATRICK  Work From Office   

  month_name  
0      April  
1      April  
2      April  
3      April  
4      April  
In [35]:
df_clean['day_type'] = df_clean['date'].dt.dayofweek.apply(lambda x: 'Weekend' if x >= 5 else 'Weekday')
print(df_clean.head())
        date weekno employee_id               name            status  \
0 2022-04-01   W 14   Atq -458@    JESSIE CARRILLO  Work From Office   
1 2022-04-01   W 14    Atq -400        chad macias  Work From Office   
2 2022-04-01   W 14    Atq -437       dallas weeks  Work From Office   
3 2022-04-01   W 14    Atq -369       GRETA HORTON  Work From Office   
4 2022-04-01   W 14    Atq -374  KALEY FITZPATRICK  Work From Office   

  month_name day_type  
0      April  Weekday  
1      April  Weekday  
2      April  Weekday  
3      April  Weekday  
4      April  Weekday  

3. Remove extra characters¶

In [37]:
df_clean['employee_id'] = df_clean['employee_id'].str.rstrip('@')
In [39]:
print(df_clean.head(40))
         date weekno employee_id                name            status  \
0  2022-04-01   W 14    Atq -458     JESSIE CARRILLO  Work From Office   
1  2022-04-01   W 14    Atq -400         chad macias  Work From Office   
2  2022-04-01   W 14    Atq -437        dallas weeks  Work From Office   
3  2022-04-01   W 14    Atq -369        GRETA HORTON  Work From Office   
4  2022-04-01   W 14    Atq -374   KALEY FITZPATRICK  Work From Office   
5  2022-04-01   W 14    Atq -470         JOVANI POPE  Work From Office   
6  2022-04-01   W 14    Atq -455      caylee meadows  Work From Office   
7  2022-04-01   W 14    Atq -385    FINNEGAN GILBERT  Work From Office   
8  2022-04-01   W 14    Atq -387       KONNER HENSON  Work From Office   
9  2022-04-01   W 14    Atq -443     andrew cummings  Work From Office   
10 2022-04-01   W 14    Atq -453       NEHEMIAH DIAZ  Work From Office   
11 2022-04-01   W 14    Atq -376  DOMINIQUE BENJAMIN  Work From Office   
12 2022-04-01   W 14    Atq -418        hillary kirk  Work From Office   
13 2022-04-01   W 14    Atq -373         isiah small  Work From Office   
14 2022-04-01   W 14    Atq -418       marquis wyatt  Work From Office   
15 2022-04-01   W 14    Atq -414      TITUS ANDERSEN  Work From Office   
16 2022-04-01   W 14    Atq -403        WILL MAHONEY  Work From Office   
17 2022-04-01   W 14    Atq -393        emma freeman  Work From Office   
18 2022-04-01   W 14    Atq -367          BRILEY ORR  Work From Office   
19 2022-04-01   W 14    Atq -421       rylee mullins  Work From Office   
20 2022-04-01   W 14    Atq -432      RYANN GUERRERO  Work From Office   
21 2022-04-01   W 14    Atq -484     DEMARCUS GORDON  Work From Office   
22 2022-04-01   W 14    Atq -428       CIARA ALLISON  Work From Office   
23 2022-04-01   W 14    Atq -467     ESPERANZA IRWIN  Work From Office   
24 2022-04-01   W 14    Atq -396        KARMA REEVES  Work From Office   
25 2022-04-01   W 14    Atq -472   cristofer bernard  Work From Office   
26 2022-04-01   W 14    Atq -412       TUCKER AUSTIN  Work From Office   
27 2022-04-01   W 14    Atq -398   salvatore hendrix  Work From Office   
28 2022-04-01   W 14    Atq -484        adyson moyer  Work From Office   
29 2022-04-01   W 14    Atq -385         JASE INGRAM  Work From Office   
30 2022-04-01   W 14    Atq -456         adriel pace  Work From Office   
31 2022-04-01   W 14    Atq -375        kenley marsh  Work From Office   
32 2022-04-01   W 14    Atq -475          LONDON KIM  Work From Office   
33 2022-04-01   W 14    Atq -435     dean mclaughlin  Work From Office   
34 2022-04-01   W 14    Atq -348       weston horton  Work From Office   
35 2022-04-01   W 14    Atq -468        xiomara ruiz  Work From Office   
36 2022-04-01   W 14    Atq -368      macie callahan  Work From Office   
37 2022-04-01   W 14    Atq -475           rey novak  Work From Office   
38 2022-04-01   W 14    Atq -375  hailee fitzpatrick  Work From Office   
39 2022-04-01   W 14    Atq -433       JAYCE FUENTES  Work From Office   

   month_name day_type  
0       April  Weekday  
1       April  Weekday  
2       April  Weekday  
3       April  Weekday  
4       April  Weekday  
5       April  Weekday  
6       April  Weekday  
7       April  Weekday  
8       April  Weekday  
9       April  Weekday  
10      April  Weekday  
11      April  Weekday  
12      April  Weekday  
13      April  Weekday  
14      April  Weekday  
15      April  Weekday  
16      April  Weekday  
17      April  Weekday  
18      April  Weekday  
19      April  Weekday  
20      April  Weekday  
21      April  Weekday  
22      April  Weekday  
23      April  Weekday  
24      April  Weekday  
25      April  Weekday  
26      April  Weekday  
27      April  Weekday  
28      April  Weekday  
29      April  Weekday  
30      April  Weekday  
31      April  Weekday  
32      April  Weekday  
33      April  Weekday  
34      April  Weekday  
35      April  Weekday  
36      April  Weekday  
37      April  Weekday  
38      April  Weekday  
39      April  Weekday  
In [105]:
#again checking for duplicates
In [41]:
print(df_clean[df_clean.duplicated(keep=False)]) 
Empty DataFrame
Columns: [date, weekno, employee_id, name, status, month_name, day_type]
Index: []

Capitalise the names¶

In [43]:
df_clean['name'] = df_clean['name'].str.title()
In [45]:
print(df_clean.head())
        date weekno employee_id               name            status  \
0 2022-04-01   W 14    Atq -458    Jessie Carrillo  Work From Office   
1 2022-04-01   W 14    Atq -400        Chad Macias  Work From Office   
2 2022-04-01   W 14    Atq -437       Dallas Weeks  Work From Office   
3 2022-04-01   W 14    Atq -369       Greta Horton  Work From Office   
4 2022-04-01   W 14    Atq -374  Kaley Fitzpatrick  Work From Office   

  month_name day_type  
0      April  Weekday  
1      April  Weekday  
2      April  Weekday  
3      April  Weekday  
4      April  Weekday  
In [47]:
# Function to create alias from status string
def create_alias(status):
    if pd.isna(status):
        return ""
    # Split status by space, take first letter of each word, join together as uppercase
    return ''.join(word[0].upper() for word in status.split())

# Apply function to status column to create new aliases column
df_clean['aliases'] = df_clean['status'].apply(create_alias)

print(df_clean.head())
        date weekno employee_id               name            status  \
0 2022-04-01   W 14    Atq -458    Jessie Carrillo  Work From Office   
1 2022-04-01   W 14    Atq -400        Chad Macias  Work From Office   
2 2022-04-01   W 14    Atq -437       Dallas Weeks  Work From Office   
3 2022-04-01   W 14    Atq -369       Greta Horton  Work From Office   
4 2022-04-01   W 14    Atq -374  Kaley Fitzpatrick  Work From Office   

  month_name day_type aliases  
0      April  Weekday     WFO  
1      April  Weekday     WFO  
2      April  Weekday     WFO  
3      April  Weekday     WFO  
4      April  Weekday     WFO  
In [49]:
df_clean['name'] = df_clean['name'].str.strip()
In [51]:
df['name'] = df['name'].str.replace(r'[^\x00-\x7F]+', '', regex=True)
In [87]:
### Droping Duplicates after cleaning the data
In [71]:
print(df_clean[df_clean.duplicated(keep=False)]) 
           date weekno employee_id                 name            status  \
1006 2022-04-14   W 16    Atq -470          Jovani Pope  Work From Office   
1008 2022-04-14   W 16    Atq -385     Finnegan Gilbert  Work From Office   
1077 2022-04-14   W 16    Atq -470          Jovani Pope  Work From Office   
1078 2022-04-14   W 16    Atq -385     Finnegan Gilbert  Work From Office   
1309 2022-04-18   W 17    Atq -455       Caylee Meadows  Work From Office   
1384 2022-04-18   W 17    Atq -455       Caylee Meadows  Work From Office   
1475 2022-04-20   W 17    Atq -421        Rylee Mullins  Work From Office   
1476 2022-04-20   W 17    Atq -432       Ryann Guerrero  Work From Office   
1478 2022-04-20   W 17    Atq -428        Ciara Allison  Work From Office   
1479 2022-04-20   W 17    Atq -467      Esperanza Irwin  Work From Office   
1480 2022-04-20   W 17    Atq -396         Karma Reeves  Work From Office   
1481 2022-04-20   W 17    Atq -472    Cristofer Bernard  Work From Office   
1482 2022-04-20   W 17    Atq -412        Tucker Austin  Work From Office   
1530 2022-04-20   W 17    Atq -421        Rylee Mullins  Work From Office   
1531 2022-04-20   W 17    Atq -432       Ryann Guerrero  Work From Office   
1533 2022-04-20   W 17    Atq -428        Ciara Allison  Work From Office   
1534 2022-04-20   W 17    Atq -467      Esperanza Irwin  Work From Office   
1535 2022-04-20   W 17    Atq -396         Karma Reeves  Work From Office   
1536 2022-04-20   W 17    Atq -472    Cristofer Bernard  Work From Office   
1537 2022-04-20   W 17    Atq -412        Tucker Austin  Work From Office   
1578 2022-04-21   W 17    Atq -372          Miley Combs  Work From Office   
1579 2022-04-21   W 17    Atq -369         Greta Horton  Work From Office   
1584 2022-04-21   W 17    Atq -387        Konner Henson  Work From Office   
1608 2022-04-21   W 17    Atq -372          Miley Combs  Work From Office   
1609 2022-04-21   W 17    Atq -369         Greta Horton  Work From Office   
1611 2022-04-21   W 17    Atq -387        Konner Henson  Work From Office   
2300 2022-05-02   W 19    Atq -373          Isiah Small  Work From Office   
2370 2022-05-02   W 19    Atq -373          Isiah Small  Work From Office   
3048 2022-05-12   W 20    Atq -461           Josh Sharp  Work From Office   
3052 2022-05-12   W 20    Atq -373          Isiah Small  Work From Office   
3053 2022-05-12   W 20    Atq -418        Marquis Wyatt  Work From Office   
3112 2022-05-12   W 20    Atq -461           Josh Sharp  Work From Office   
3116 2022-05-12   W 20    Atq -373          Isiah Small  Work From Office   
3117 2022-05-12   W 20    Atq -418        Marquis Wyatt  Work From Office   
3362 2022-05-16   W 21    Atq -461           Josh Sharp  Work From Office   
3436 2022-05-16   W 21    Atq -461           Josh Sharp  Work From Office   
3916 2022-05-23   W 22    Atq -411        Samuel Murray  Work From Office   
3995 2022-05-23   W 22    Atq -411        Samuel Murray  Work From Office   
4083 2022-05-25   W 22    Atq -393         Emma Freeman  Work From Office   
4088 2022-05-25   W 22    Atq -467      Esperanza Irwin  Work From Office   
4089 2022-05-25   W 22    Atq -396         Karma Reeves  Work From Office   
4091 2022-05-25   W 22    Atq -412        Tucker Austin  Work From Office   
4092 2022-05-25   W 22    Atq -385          Jase Ingram  Work From Office   
4093 2022-05-25   W 22    Atq -456          Adriel Pace  Work From Office   
4094 2022-05-25   W 22    Atq -475           London Kim  Work From Office   
4138 2022-05-25   W 22    Atq -393         Emma Freeman  Work From Office   
4143 2022-05-25   W 22    Atq -467      Esperanza Irwin  Work From Office   
4144 2022-05-25   W 22    Atq -396         Karma Reeves  Work From Office   
4145 2022-05-25   W 22    Atq -412        Tucker Austin  Work From Office   
4146 2022-05-25   W 22    Atq -385          Jase Ingram  Work From Office   
4147 2022-05-25   W 22    Atq -456          Adriel Pace  Work From Office   
4148 2022-05-25   W 22    Atq -475           London Kim  Work From Office   
4176 2022-05-26   W 22    Atq -389      Julien Hamilton  Work From Office   
4179 2022-05-26   W 22    Atq -426     Isabella Pittman  Work From Office   
4180 2022-05-26   W 22    Atq -362       Kiana Mcmillan  Work From Office   
4182 2022-05-26   W 22    Atq -400          Chad Macias  Work From Office   
4185 2022-05-26   W 22    Atq -374    Kaley Fitzpatrick  Work From Office   
4186 2022-05-26   W 22    Atq -470          Jovani Pope  Work From Office   
4187 2022-05-26   W 22    Atq -387        Konner Henson  Work From Office   
4189 2022-05-26   W 22    Atq -364         Keaton Nixon  Work From Office   
4190 2022-05-26   W 22    Atq -448          Athena Rios  Work From Office   
4216 2022-05-26   W 22    Atq -389      Julien Hamilton  Work From Office   
4219 2022-05-26   W 22    Atq -426     Isabella Pittman  Work From Office   
4220 2022-05-26   W 22    Atq -362       Kiana Mcmillan  Work From Office   
4222 2022-05-26   W 22    Atq -400          Chad Macias  Work From Office   
4224 2022-05-26   W 22    Atq -374    Kaley Fitzpatrick  Work From Office   
4225 2022-05-26   W 22    Atq -470          Jovani Pope  Work From Office   
4226 2022-05-26   W 22    Atq -387        Konner Henson  Work From Office   
4228 2022-05-26   W 22    Atq -364         Keaton Nixon  Work From Office   
4229 2022-05-26   W 22    Atq -448          Athena Rios  Work From Office   
4666 2022-06-01   W 23    Atq -433        Jayce Fuentes  Work From Office   
4671 2022-06-01   W 23    Atq -459         Iris Woodard  Work From Office   
4672 2022-06-01   W 23    Atq -396            Joel Cruz  Work From Office   
4674 2022-06-01   W 23    Atq -446          April Ayers  Work From Office   
4675 2022-06-01   W 23    Atq -334         Boston Morse  Work From Office   
4678 2022-06-01   W 23    Atq -440       Mckayla Parker  Work From Office   
4679 2022-06-01   W 23    Atq -405          Sofia Solis  Work From Office   
4681 2022-06-01   W 23    Atq -467          Cason David  Work From Office   
4682 2022-06-01   W 23    Atq -419       Kaylah Schultz  Work From Office   
4701 2022-06-01   W 23    Atq -433        Jayce Fuentes  Work From Office   
4704 2022-06-01   W 23    Atq -459         Iris Woodard  Work From Office   
4705 2022-06-01   W 23    Atq -396            Joel Cruz  Work From Office   
4707 2022-06-01   W 23    Atq -446          April Ayers  Work From Office   
4708 2022-06-01   W 23    Atq -334         Boston Morse  Work From Office   
4710 2022-06-01   W 23    Atq -440       Mckayla Parker  Work From Office   
4711 2022-06-01   W 23    Atq -405          Sofia Solis  Work From Office   
4713 2022-06-01   W 23    Atq -467          Cason David  Work From Office   
4714 2022-06-01   W 23    Atq -419       Kaylah Schultz  Work From Office   
4727 2022-06-02   W 23    Atq -385         Alyson Huber  Work From Office   
4729 2022-06-02   W 23    Atq -414        Mckayla Miles  Work From Office   
4732 2022-06-02   W 23    Atq -362       Kiana Mcmillan  Work From Office   
4734 2022-06-02   W 23    Atq -455         Aditya Walls  Work From Office   
4735 2022-06-02   W 23    Atq -458      Jessie Carrillo  Work From Office   
4737 2022-06-02   W 23    Atq -372          Miley Combs  Work From Office   
4738 2022-06-02   W 23    Atq -374    Kaley Fitzpatrick  Work From Office   
4740 2022-06-02   W 23    Atq -401        Piper Carroll  Work From Office   
4741 2022-06-02   W 23    Atq -371     Kayleigh Kennedy  Work From Office   
4775 2022-06-02   W 23    Atq -385         Alyson Huber  Work From Office   
4777 2022-06-02   W 23    Atq -414        Mckayla Miles  Work From Office   
4780 2022-06-02   W 23    Atq -362       Kiana Mcmillan  Work From Office   
4782 2022-06-02   W 23    Atq -455         Aditya Walls  Work From Office   
4783 2022-06-02   W 23    Atq -458      Jessie Carrillo  Work From Office   
4785 2022-06-02   W 23    Atq -372          Miley Combs  Work From Office   
4786 2022-06-02   W 23    Atq -374    Kaley Fitzpatrick  Work From Office   
4788 2022-06-02   W 23    Atq -401        Piper Carroll  Work From Office   
4789 2022-06-02   W 23    Atq -371     Kayleigh Kennedy  Work From Office   
5034 2022-06-06   W 24    Atq -472        Quincy Mendez  Work From Office   
5110 2022-06-06   W 24    Atq -472        Quincy Mendez  Work From Office   
5262 2022-06-09   W 24    Atq -376   Dominique Benjamin  Work From Office   
5333 2022-06-09   W 24    Atq -376   Dominique Benjamin  Work From Office   
5571 2022-06-13   W 25    Atq -376   Dominique Benjamin  Work From Office   
5648 2022-06-13   W 25    Atq -376   Dominique Benjamin  Work From Office   
6093 2022-06-19   W 26    Atq -348       Zaiden Wheeler    Work From Home   
6094 2022-06-19   W 26    Atq -374    Kaley Fitzpatrick    Work From Home   
6097 2022-06-19   W 26    Atq -361          Makai Hardy    Work From Home   
6099 2022-06-19   W 26    Atq -398           Bo Cordova    Work From Home   
6101 2022-06-19   W 26    Atq -348       Zaiden Wheeler    Work From Home   
6105 2022-06-19   W 26    Atq -361          Makai Hardy    Work From Home   
6107 2022-06-19   W 26    Atq -398           Bo Cordova    Work From Home   
6110 2022-06-19   W 26    Atq -374    Kaley Fitzpatrick    Work From Home   
6299 2022-06-26   W 27    Atq -348       Zaiden Wheeler    Work From Home   
6300 2022-06-26   W 27    Atq -350  Alexander Davenport    Work From Home   
6301 2022-06-26   W 27    Atq -459       Leslie Navarro    Work From Home   
6302 2022-06-26   W 27    Atq -398           Bo Cordova    Work From Home   
6306 2022-06-26   W 27    Atq -350  Alexander Davenport    Work From Home   
6308 2022-06-26   W 27    Atq -398           Bo Cordova    Work From Home   
6309 2022-06-26   W 27    Atq -348       Zaiden Wheeler    Work From Home   
6311 2022-06-26   W 27    Atq -459       Leslie Navarro    Work From Home   

     month_name day_type aliases  
1006      April  Weekday     WFO  
1008      April  Weekday     WFO  
1077      April  Weekday     WFO  
1078      April  Weekday     WFO  
1309      April  Weekday     WFO  
1384      April  Weekday     WFO  
1475      April  Weekday     WFO  
1476      April  Weekday     WFO  
1478      April  Weekday     WFO  
1479      April  Weekday     WFO  
1480      April  Weekday     WFO  
1481      April  Weekday     WFO  
1482      April  Weekday     WFO  
1530      April  Weekday     WFO  
1531      April  Weekday     WFO  
1533      April  Weekday     WFO  
1534      April  Weekday     WFO  
1535      April  Weekday     WFO  
1536      April  Weekday     WFO  
1537      April  Weekday     WFO  
1578      April  Weekday     WFO  
1579      April  Weekday     WFO  
1584      April  Weekday     WFO  
1608      April  Weekday     WFO  
1609      April  Weekday     WFO  
1611      April  Weekday     WFO  
2300        May  Weekday     WFO  
2370        May  Weekday     WFO  
3048        May  Weekday     WFO  
3052        May  Weekday     WFO  
3053        May  Weekday     WFO  
3112        May  Weekday     WFO  
3116        May  Weekday     WFO  
3117        May  Weekday     WFO  
3362        May  Weekday     WFO  
3436        May  Weekday     WFO  
3916        May  Weekday     WFO  
3995        May  Weekday     WFO  
4083        May  Weekday     WFO  
4088        May  Weekday     WFO  
4089        May  Weekday     WFO  
4091        May  Weekday     WFO  
4092        May  Weekday     WFO  
4093        May  Weekday     WFO  
4094        May  Weekday     WFO  
4138        May  Weekday     WFO  
4143        May  Weekday     WFO  
4144        May  Weekday     WFO  
4145        May  Weekday     WFO  
4146        May  Weekday     WFO  
4147        May  Weekday     WFO  
4148        May  Weekday     WFO  
4176        May  Weekday     WFO  
4179        May  Weekday     WFO  
4180        May  Weekday     WFO  
4182        May  Weekday     WFO  
4185        May  Weekday     WFO  
4186        May  Weekday     WFO  
4187        May  Weekday     WFO  
4189        May  Weekday     WFO  
4190        May  Weekday     WFO  
4216        May  Weekday     WFO  
4219        May  Weekday     WFO  
4220        May  Weekday     WFO  
4222        May  Weekday     WFO  
4224        May  Weekday     WFO  
4225        May  Weekday     WFO  
4226        May  Weekday     WFO  
4228        May  Weekday     WFO  
4229        May  Weekday     WFO  
4666       June  Weekday     WFO  
4671       June  Weekday     WFO  
4672       June  Weekday     WFO  
4674       June  Weekday     WFO  
4675       June  Weekday     WFO  
4678       June  Weekday     WFO  
4679       June  Weekday     WFO  
4681       June  Weekday     WFO  
4682       June  Weekday     WFO  
4701       June  Weekday     WFO  
4704       June  Weekday     WFO  
4705       June  Weekday     WFO  
4707       June  Weekday     WFO  
4708       June  Weekday     WFO  
4710       June  Weekday     WFO  
4711       June  Weekday     WFO  
4713       June  Weekday     WFO  
4714       June  Weekday     WFO  
4727       June  Weekday     WFO  
4729       June  Weekday     WFO  
4732       June  Weekday     WFO  
4734       June  Weekday     WFO  
4735       June  Weekday     WFO  
4737       June  Weekday     WFO  
4738       June  Weekday     WFO  
4740       June  Weekday     WFO  
4741       June  Weekday     WFO  
4775       June  Weekday     WFO  
4777       June  Weekday     WFO  
4780       June  Weekday     WFO  
4782       June  Weekday     WFO  
4783       June  Weekday     WFO  
4785       June  Weekday     WFO  
4786       June  Weekday     WFO  
4788       June  Weekday     WFO  
4789       June  Weekday     WFO  
5034       June  Weekday     WFO  
5110       June  Weekday     WFO  
5262       June  Weekday     WFO  
5333       June  Weekday     WFO  
5571       June  Weekday     WFO  
5648       June  Weekday     WFO  
6093       June  Weekend     WFH  
6094       June  Weekend     WFH  
6097       June  Weekend     WFH  
6099       June  Weekend     WFH  
6101       June  Weekend     WFH  
6105       June  Weekend     WFH  
6107       June  Weekend     WFH  
6110       June  Weekend     WFH  
6299       June  Weekend     WFH  
6300       June  Weekend     WFH  
6301       June  Weekend     WFH  
6302       June  Weekend     WFH  
6306       June  Weekend     WFH  
6308       June  Weekend     WFH  
6309       June  Weekend     WFH  
6311       June  Weekend     WFH  
In [75]:
df_clean = df_clean.drop_duplicates()

Task 2: Ad Hoc aka Analysis¶

In [77]:
# 1. What is the total count of distinct employee names within the dataset?
distinct_name_count = df_clean['name'].nunique()
print("Total distinct employee names:", distinct_name_count)
Total distinct employee names: 99
In [79]:
# 2. Calculate the work-from-home (WFH %) percentage in the month of May.
may_data = df_clean[df_clean['date'].dt.strftime('%m') == '05']

wfh_may = len(may_data[may_data['status'] == "Work From Home"])
print("WFH count in May: ", wfh_may)
WFH count in May:  173
In [81]:
# Count WFH days in May
wfh_days = (may_data['status'] == "Work From Home").sum()

# Count WFO days in May
wfo_days = (may_data['status'] == "Work From Office").sum()

# Total present days = WFH + WFO
total_present_days = wfh_days + wfo_days

print("Total working days in may: ",total_present_days)
Total working days in may:  1506
In [85]:
wfh_percentage1 = (wfh_may / total_present_days) * 100 if total_present_days > 0 else 0
print(f"WFH % in May: {wfh_percentage1:.2f}%")
WFH % in May: 11.49%
In [61]:
# Filter rows where status is WFH or WFO
present_data = may_data[
    (may_data['status'] == "Work From Home") |
    (may_data['status'] == "Work From Office")
]

# Export all present-day rows to CSV
present_data.to_csv("total_present_days.csv", index=False)

print("CSV file saved as total_present_days.csv with all present-day rows")
CSV file saved as total_present_days.csv with all present-day rows
In [ ]: