diff --git a/assignment2/diary.py b/assignment2/diary.py new file mode 100644 index 0000000..998b536 --- /dev/null +++ b/assignment2/diary.py @@ -0,0 +1,199 @@ +# Task 1: Diary +import traceback + +try: + with open("diary.txt", "a") as file: # Open file in append mode + first_entry = True # Flag to check if it's the first input + + while True: + # If it is the first input, prompt for "What happened today?" + if first_entry: + entry = input("What happened today? ") + first_entry = False # Change the flag to False after the first input + else: + entry = input("What else? ") + + # Check if the user enters 'done for now' + if entry.lower() == "done for now": + file.write(entry + "\n") # Write "done for now" to the file + break # Exit the loop after writing "done for now" + + # Write the user's entry to the file if it's not 'done for now' + file.write(entry + "\n") + +except Exception as e: + # Handle exceptions and print the traceback + trace_back = traceback.extract_tb(e.__traceback__) + stack_trace = [] + for trace in trace_back: + stack_trace.append(f'File : {trace[0]} , Line : {trace[1]}, Func.Name : {trace[2]}, Message : {trace[3]}') + print(f"Exception type: {type(e).__name__}") + message = str(e) + if message: + print(f"Exception message: {message}") + print(f"Stack trace: {stack_trace}") + + + +#Task 2: Read a CSV File +import csv +def read_employees(): + employees = {} # To store data + rows = [] # To store employee rows + try: + with open('../csv/employees.csv', mode='r') as file: + csv_reader = csv.reader(file) + fields = next(csv_reader) # Get the first row for field names + employees['fields'] = fields + for row in csv_reader: + rows.append(row) + employees['rows'] = rows + except Exception as e: + print("An exception occurred: ", e) + return employees + +# Test +employees = read_employees() +print(employees) + +#Task 3: Find the Column Index +def column_index(field_name): + return employees["fields"].index(field_name) + +# Test +employee_id_column = column_index("employee_id") +print(employee_id_column) + +#Task 4: Find the Employee First Name +def first_name(row_num): + column_idx = column_index("first_name") + return employees["rows"][row_num][column_idx] + +# Test +print(first_name(0)) # Get first name of the first employee + +#Task 5: Find the Employee:a Function in a Function +def employee_find(employee_id): + def employee_match(row): + return int(row[employee_id_column]) == employee_id + + matches = list(filter(employee_match, employees["rows"])) + return matches + +# Test +print(employee_find(1001)) + +#Task 6: Find the Employee with a Lambda +def employee_find_2(employee_id): + matches = list(filter(lambda row: int(row[employee_id_column]) == employee_id, employees["rows"])) + return matches + +# Test +print(employee_find_2(1001)) + +#Task 7: Sort the Rows by last_name Using a Lambda +def sort_by_last_name(): + last_name_column = column_index("last_name") + employees["rows"].sort(key=lambda row: row[last_name_column]) + return employees["rows"] + +# Test +sorted_employees = sort_by_last_name() +print(sorted_employees) + +#Task 8: Create a dict for an Employee +def employee_dict(row): + return {employees["fields"][i]: row[i] for i in range(len(row)) if employees["fields"][i] != "employee_id"} + +# Test +print(employee_dict(employees["rows"][0])) # Test for the first row + +#Task 9: A dict of dicts, for All Employees +def all_employees_dict(): + return {row[employee_id_column]: employee_dict(row) for row in employees["rows"]} + +# Test +print(all_employees_dict()) + +#Task 10: Use the os Module +# custom_module.py +secret = "shazam!" +def set_secret(new_secret): + global secret + secret = new_secret + +#Task 11: Creating Your Own Module + #1. Create custom_module.py: + # custom_module.py +secret = "shazam!" + +def set_secret(new_secret): + global secret + secret = new_secret + #2. In your main program: +import custom_module + +def set_that_secret(new_secret): + custom_module.set_secret(new_secret) + +# Test +set_that_secret("new_secret_value") +print(custom_module.secret) + +#Task 12: Read minutes1.csv and minutes2.csv +def read_minutes(): + def read_file(file_name): + minutes = {"fields": [], "rows": []} + try: + with open(file_name, mode='r') as file: + csv_reader = csv.reader(file) + minutes["fields"] = next(csv_reader) + for row in csv_reader: + minutes["rows"].append(tuple(row)) # Convert rows to tuple + except Exception as e: + print("An exception occurred: ", e) + return minutes + + minutes1 = read_file("../csv/minutes1.csv") + minutes2 = read_file("../csv/minutes2.csv") + return minutes1, minutes2 + +# Test +minutes1, minutes2 = read_minutes() +print(minutes1) +print(minutes2) + +#Task 13: Create minutes_set +def create_minutes_set(): + minutes1_set = set(minutes1["rows"]) + minutes2_set = set(minutes2["rows"]) + return minutes1_set.union(minutes2_set) + +# Test +minutes_set = create_minutes_set() +print(minutes_set) + +#Task 14: Convert to datetime +from datetime import datetime + +def create_minutes_list(): + minutes_list = list(minutes_set) + return list(map(lambda x: (x[0], datetime.strptime(x[1], "%B %d, %Y")), minutes_list)) + +# Test +minutes_list = create_minutes_list() +print(minutes_list) + +#Task 15: Write Out Sorted List +def write_sorted_list(): + minutes_list.sort(key=lambda x: x[1]) # Sort by datetime + with open("./minutes.csv", mode='w', newline='') as file: + csv_writer = csv.writer(file) + csv_writer.writerow(minutes1["fields"]) + for row in minutes_list: + csv_writer.writerow([row[0], row[1].strftime("%B %d, %Y")]) + return minutes_list + +# Test +sorted_minutes = write_sorted_list() +print(sorted_minutes) diff --git a/assignment2/diary.txt b/assignment2/diary.txt new file mode 100644 index 0000000..8066252 --- /dev/null +++ b/assignment2/diary.txt @@ -0,0 +1,45 @@ + + + + + + +done for now + +done for now +done for now +done for now +done for now +exit +done for now +done for now +done for now +"done for now" + + + + + + + + +done for now + +I went to school +meet friends +done for now +done for now +done for now +done for now +I went to dinner +meet some friends +i went to gym +done for now +done for now +done for now +went to gym +meet some friends +went to dinner +done for now +done for now +done for now diff --git a/assignment3/additional_employees.json b/assignment3/additional_employees.json new file mode 100644 index 0000000..872c025 --- /dev/null +++ b/assignment3/additional_employees.json @@ -0,0 +1,4 @@ +[ + {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000}, + {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000} +] \ No newline at end of file diff --git a/assignment3/assignment3-test.py b/assignment3/assignment3-test.py index d1ac1af..5843753 100644 --- a/assignment3/assignment3-test.py +++ b/assignment3/assignment3-test.py @@ -3,7 +3,7 @@ import pandas as pd import os -test1_df = pd.DataFrame({ 'Name': ['Alice', 'Bob', 'charlie'], +test1_df = pd.DataFrame({ 'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35], 'City': ['New York', 'Los Angeles', 'Chicago']}) @@ -104,9 +104,10 @@ def test_department_uppercase(): all_upper = False assert all_upper - - - +# April 2: If dates are not converted properly with form="mixed" will end up with NaTs +def test_hire_date_notNAT(): + nat_count = a3.clean_data['Hire Date'].isna().sum() + assert nat_count == 0 diff --git a/assignment3/assignment3.py b/assignment3/assignment3.py index e69de29..c760515 100644 --- a/assignment3/assignment3.py +++ b/assignment3/assignment3.py @@ -0,0 +1,76 @@ +# Task 1: Introduction to Pandas - Creating and Manipulating DataFrames + #1. Create a DataFrame from a dictionary: +import pandas as pd + +# Create dictionary +data = { + 'Name': ['Alice', 'Bob', 'Charlie'], + 'Age': [25, 30, 35], + 'City': ['New York', 'Los Angeles', 'Chicago'] +} +# Convert to DataFrame +task1_data_frame = pd.DataFrame(data) +print(task1_data_frame) + #2. Add a new column: +task1_with_salary = task1_data_frame.copy() +task1_with_salary['Salary'] = [70000, 80000, 90000] +print(task1_with_salary) + #3. Modify an existing column: +task1_older = task1_with_salary.copy() +task1_older['Age'] = task1_older['Age'] + 1 +print(task1_older) + #4. Save the DataFrame as a CSV file: +task1_older.to_csv('employees.csv', index=False) + +#Task 2: Loading Data from CSV and JSON + #1. Read data from a CSV file: +task2_employees = pd.read_csv('employees.csv') +print(task2_employees) + #2. Read data from a JSON file: +json_employees = pd.read_json('additional_employees.json') +print(json_employees) +more_employees = pd.concat([task2_employees, json_employees], ignore_index=True) +print(more_employees) + +#Task 3: Data Inspection - Using Head, Tail, and Info Methods + #Using the head() method: +first_three = more_employees.head(3) +print(first_three) + # Using the tail() method: +last_two = more_employees.tail(2) +print(last_two) + #Get the shape of a DataFrame +employee_shape = more_employees.shape +print(employee_shape) + #Use the info() method: +more_employees.info() + +# Task 4: Data Cleaning + # Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data. +dirty_data = pd.read_csv('dirty_data.csv') +print(dirty_data) +clean_data = dirty_data.copy() + #Remove any duplicate rows from the DataFrame +clean_data = clean_data.drop_duplicates() +print(clean_data) + # Convert Age to numeric and handle missing values +clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce') +print(clean_data) + # Convert Salary to numeric and replace known placeholders (unknown, n/a) with NaN +clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA) +clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors='coerce') +print(clean_data) + # Fill missing numeric values (use fillna). Fill Age which the mean and Salary with the median +mean_age = clean_data['Age'].mean() +median_salary = clean_data['Salary'].median() + +clean_data['Age'] = clean_data['Age'].fillna(mean_age) +clean_data['Salary'] = clean_data['Salary'].fillna(median_salary) +print(clean_data) + #Convert Hire Date to datetime +clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce') +print(clean_data) + # Strip extra whitespace and standardize Name and Department as uppercase +clean_data['Name'] = clean_data['Name'].str.strip().str.upper() +clean_data['Department'] = clean_data['Department'].str.strip().str.upper() +print(clean_data) \ No newline at end of file diff --git a/assignment3/employees.csv b/assignment3/employees.csv new file mode 100644 index 0000000..2bd2f60 --- /dev/null +++ b/assignment3/employees.csv @@ -0,0 +1,4 @@ +Name,Age,City,Salary +Alice,26,New York,70000 +Bob,31,Los Angeles,80000 +Charlie,36,Chicago,90000