Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions assignment2/diary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Task 1: Diary
import traceback

try:
with open("diary.txt", "a") as file: # Open file in append mode
first_entry = True # Flag to check if it's the first input

while True:
# If it is the first input, prompt for "What happened today?"
if first_entry:
entry = input("What happened today? ")
first_entry = False # Change the flag to False after the first input
else:
entry = input("What else? ")

# Check if the user enters 'done for now'
if entry.lower() == "done for now":
file.write(entry + "\n") # Write "done for now" to the file
break # Exit the loop after writing "done for now"

# Write the user's entry to the file if it's not 'done for now'
file.write(entry + "\n")

except Exception as e:
# Handle exceptions and print the traceback
trace_back = traceback.extract_tb(e.__traceback__)
stack_trace = []
for trace in trace_back:
stack_trace.append(f'File : {trace[0]} , Line : {trace[1]}, Func.Name : {trace[2]}, Message : {trace[3]}')
print(f"Exception type: {type(e).__name__}")
message = str(e)
if message:
print(f"Exception message: {message}")
print(f"Stack trace: {stack_trace}")



#Task 2: Read a CSV File
import csv
def read_employees():
employees = {} # To store data
rows = [] # To store employee rows
try:
with open('../csv/employees.csv', mode='r') as file:
csv_reader = csv.reader(file)
fields = next(csv_reader) # Get the first row for field names
employees['fields'] = fields
for row in csv_reader:
rows.append(row)
employees['rows'] = rows
except Exception as e:
print("An exception occurred: ", e)
return employees

# Test
employees = read_employees()
print(employees)

#Task 3: Find the Column Index
def column_index(field_name):
return employees["fields"].index(field_name)

# Test
employee_id_column = column_index("employee_id")
print(employee_id_column)

#Task 4: Find the Employee First Name
def first_name(row_num):
column_idx = column_index("first_name")
return employees["rows"][row_num][column_idx]

# Test
print(first_name(0)) # Get first name of the first employee

#Task 5: Find the Employee:a Function in a Function
def employee_find(employee_id):
def employee_match(row):
return int(row[employee_id_column]) == employee_id

matches = list(filter(employee_match, employees["rows"]))
return matches

# Test
print(employee_find(1001))

#Task 6: Find the Employee with a Lambda
def employee_find_2(employee_id):
matches = list(filter(lambda row: int(row[employee_id_column]) == employee_id, employees["rows"]))
return matches

# Test
print(employee_find_2(1001))

#Task 7: Sort the Rows by last_name Using a Lambda
def sort_by_last_name():
last_name_column = column_index("last_name")
employees["rows"].sort(key=lambda row: row[last_name_column])
return employees["rows"]

# Test
sorted_employees = sort_by_last_name()
print(sorted_employees)

#Task 8: Create a dict for an Employee
def employee_dict(row):
return {employees["fields"][i]: row[i] for i in range(len(row)) if employees["fields"][i] != "employee_id"}

# Test
print(employee_dict(employees["rows"][0])) # Test for the first row

#Task 9: A dict of dicts, for All Employees
def all_employees_dict():
return {row[employee_id_column]: employee_dict(row) for row in employees["rows"]}

# Test
print(all_employees_dict())

#Task 10: Use the os Module
# custom_module.py
secret = "shazam!"
def set_secret(new_secret):
global secret
secret = new_secret

#Task 11: Creating Your Own Module
#1. Create custom_module.py:
# custom_module.py
secret = "shazam!"

def set_secret(new_secret):
global secret
secret = new_secret
#2. In your main program:
import custom_module

def set_that_secret(new_secret):
custom_module.set_secret(new_secret)

# Test
set_that_secret("new_secret_value")
print(custom_module.secret)

#Task 12: Read minutes1.csv and minutes2.csv
def read_minutes():
def read_file(file_name):
minutes = {"fields": [], "rows": []}
try:
with open(file_name, mode='r') as file:
csv_reader = csv.reader(file)
minutes["fields"] = next(csv_reader)
for row in csv_reader:
minutes["rows"].append(tuple(row)) # Convert rows to tuple
except Exception as e:
print("An exception occurred: ", e)
return minutes

minutes1 = read_file("../csv/minutes1.csv")
minutes2 = read_file("../csv/minutes2.csv")
return minutes1, minutes2

# Test
minutes1, minutes2 = read_minutes()
print(minutes1)
print(minutes2)

#Task 13: Create minutes_set
def create_minutes_set():
minutes1_set = set(minutes1["rows"])
minutes2_set = set(minutes2["rows"])
return minutes1_set.union(minutes2_set)

# Test
minutes_set = create_minutes_set()
print(minutes_set)

#Task 14: Convert to datetime
from datetime import datetime

def create_minutes_list():
minutes_list = list(minutes_set)
return list(map(lambda x: (x[0], datetime.strptime(x[1], "%B %d, %Y")), minutes_list))

# Test
minutes_list = create_minutes_list()
print(minutes_list)

#Task 15: Write Out Sorted List
def write_sorted_list():
minutes_list.sort(key=lambda x: x[1]) # Sort by datetime
with open("./minutes.csv", mode='w', newline='') as file:
csv_writer = csv.writer(file)
csv_writer.writerow(minutes1["fields"])
for row in minutes_list:
csv_writer.writerow([row[0], row[1].strftime("%B %d, %Y")])
return minutes_list

# Test
sorted_minutes = write_sorted_list()
print(sorted_minutes)
45 changes: 45 additions & 0 deletions assignment2/diary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@






done for now

done for now
done for now
done for now
done for now
exit
done for now
done for now
done for now
"done for now"








done for now

I went to school
meet friends
done for now
done for now
done for now
done for now
I went to dinner
meet some friends
i went to gym
done for now
done for now
done for now
went to gym
meet some friends
went to dinner
done for now
done for now
done for now
4 changes: 4 additions & 0 deletions assignment3/additional_employees.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
{"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
{"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
]
9 changes: 5 additions & 4 deletions assignment3/assignment3-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
import os

test1_df = pd.DataFrame({ 'Name': ['Alice', 'Bob', 'charlie'],
test1_df = pd.DataFrame({ 'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']})

Expand Down Expand Up @@ -104,9 +104,10 @@ def test_department_uppercase():
all_upper = False
assert all_upper




# April 2: If dates are not converted properly with form="mixed" will end up with NaTs
def test_hire_date_notNAT():
nat_count = a3.clean_data['Hire Date'].isna().sum()
assert nat_count == 0



Expand Down
76 changes: 76 additions & 0 deletions assignment3/assignment3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Task 1: Introduction to Pandas - Creating and Manipulating DataFrames
#1. Create a DataFrame from a dictionary:
import pandas as pd

# Create dictionary
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}
# Convert to DataFrame
task1_data_frame = pd.DataFrame(data)
print(task1_data_frame)
#2. Add a new column:
task1_with_salary = task1_data_frame.copy()
task1_with_salary['Salary'] = [70000, 80000, 90000]
print(task1_with_salary)
#3. Modify an existing column:
task1_older = task1_with_salary.copy()
task1_older['Age'] = task1_older['Age'] + 1
print(task1_older)
#4. Save the DataFrame as a CSV file:
task1_older.to_csv('employees.csv', index=False)

#Task 2: Loading Data from CSV and JSON
#1. Read data from a CSV file:
task2_employees = pd.read_csv('employees.csv')
print(task2_employees)
#2. Read data from a JSON file:
json_employees = pd.read_json('additional_employees.json')
print(json_employees)
more_employees = pd.concat([task2_employees, json_employees], ignore_index=True)
print(more_employees)

#Task 3: Data Inspection - Using Head, Tail, and Info Methods
#Using the head() method:
first_three = more_employees.head(3)
print(first_three)
# Using the tail() method:
last_two = more_employees.tail(2)
print(last_two)
#Get the shape of a DataFrame
employee_shape = more_employees.shape
print(employee_shape)
#Use the info() method:
more_employees.info()

# Task 4: Data Cleaning
# Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data.
dirty_data = pd.read_csv('dirty_data.csv')
print(dirty_data)
clean_data = dirty_data.copy()
#Remove any duplicate rows from the DataFrame
clean_data = clean_data.drop_duplicates()
print(clean_data)
# Convert Age to numeric and handle missing values
clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')
print(clean_data)
# Convert Salary to numeric and replace known placeholders (unknown, n/a) with NaN
clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA)
clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors='coerce')
print(clean_data)
# Fill missing numeric values (use fillna). Fill Age which the mean and Salary with the median
mean_age = clean_data['Age'].mean()
median_salary = clean_data['Salary'].median()

clean_data['Age'] = clean_data['Age'].fillna(mean_age)
clean_data['Salary'] = clean_data['Salary'].fillna(median_salary)
print(clean_data)
#Convert Hire Date to datetime
clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce')
print(clean_data)
# Strip extra whitespace and standardize Name and Department as uppercase
clean_data['Name'] = clean_data['Name'].str.strip().str.upper()
clean_data['Department'] = clean_data['Department'].str.strip().str.upper()
print(clean_data)
4 changes: 4 additions & 0 deletions assignment3/employees.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Name,Age,City,Salary
Alice,26,New York,70000
Bob,31,Los Angeles,80000
Charlie,36,Chicago,90000