lamhich · lamhich · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
diff --git a/assignment2/diary.py b/assignment2/diary.py
@@ -0,0 +1,199 @@
+# Task 1: Diary
+import traceback
+
+try:
+    with open("diary.txt", "a") as file:  # Open file in append mode
+        first_entry = True  # Flag to check if it's the first input
+
+        while True:
+            # If it is the first input, prompt for "What happened today?"
+            if first_entry:
+                entry = input("What happened today? ")  
+                first_entry = False  # Change the flag to False after the first input
+            else:
+                entry = input("What else? ")
+
+            # Check if the user enters 'done for now'
+            if entry.lower() == "done for now":
+                file.write(entry + "\n")  # Write "done for now" to the file
+                break  # Exit the loop after writing "done for now"
+
+            # Write the user's entry to the file if it's not 'done for now'
+            file.write(entry + "\n")
+
+except Exception as e:
+    # Handle exceptions and print the traceback
+    trace_back = traceback.extract_tb(e.__traceback__)
+    stack_trace = []
+    for trace in trace_back:
+        stack_trace.append(f'File : {trace[0]} , Line : {trace[1]}, Func.Name : {trace[2]}, Message : {trace[3]}')
+    print(f"Exception type: {type(e).__name__}")
+    message = str(e)
+    if message:
+        print(f"Exception message: {message}")
+    print(f"Stack trace: {stack_trace}")
+
+
+
+#Task 2: Read a CSV File
+import csv
+def read_employees():
+    employees = {}  # To store data
+    rows = []  # To store employee rows
+    try:
+        with open('../csv/employees.csv', mode='r') as file:
+            csv_reader = csv.reader(file)
+            fields = next(csv_reader)  # Get the first row for field names
+            employees['fields'] = fields
+            for row in csv_reader:
+                rows.append(row)
+            employees['rows'] = rows
+    except Exception as e:
+        print("An exception occurred: ", e)
+    return employees
+
+# Test
+employees = read_employees()
+print(employees)
+
+#Task 3: Find the Column Index
+def column_index(field_name):
+    return employees["fields"].index(field_name)
+
+# Test
+employee_id_column = column_index("employee_id")
+print(employee_id_column)
+
+#Task 4: Find the Employee First Name
+def first_name(row_num):
+    column_idx = column_index("first_name")
+    return employees["rows"][row_num][column_idx]
+
+# Test
+print(first_name(0))  # Get first name of the first employee
+
+#Task 5: Find the Employee:a Function in a Function
+def employee_find(employee_id):
+    def employee_match(row):
+        return int(row[employee_id_column]) == employee_id
+
+    matches = list(filter(employee_match, employees["rows"]))
+    return matches
+
+# Test
+print(employee_find(1001))
+
+#Task 6: Find the Employee with a Lambda
+def employee_find_2(employee_id):
+    matches = list(filter(lambda row: int(row[employee_id_column]) == employee_id, employees["rows"]))
+    return matches
+
+# Test
+print(employee_find_2(1001))
+
+#Task 7: Sort the Rows by last_name Using a Lambda
+def sort_by_last_name():
+    last_name_column = column_index("last_name")
+    employees["rows"].sort(key=lambda row: row[last_name_column])
+    return employees["rows"]
+
+# Test
+sorted_employees = sort_by_last_name()
+print(sorted_employees)
+
+#Task 8: Create a dict for an Employee
+def employee_dict(row):
+    return {employees["fields"][i]: row[i] for i in range(len(row)) if employees["fields"][i] != "employee_id"}
+
+# Test
+print(employee_dict(employees["rows"][0]))  # Test for the first row
+
+#Task 9: A dict of dicts, for All Employees
+def all_employees_dict():
+    return {row[employee_id_column]: employee_dict(row) for row in employees["rows"]}
+
+# Test
+print(all_employees_dict())
+
+#Task 10: Use the os Module
+# custom_module.py
+secret = "shazam!"
+def set_secret(new_secret):
+    global secret
+    secret = new_secret
+
+#Task 11: Creating Your Own Module
+    #1.	Create custom_module.py:
+        # custom_module.py
+secret = "shazam!"
+
+def set_secret(new_secret):
+    global secret
+    secret = new_secret
+    #2.	In your main program:
+import custom_module
+
+def set_that_secret(new_secret):
+    custom_module.set_secret(new_secret)
+
+# Test
+set_that_secret("new_secret_value")
+print(custom_module.secret)
+
+#Task 12: Read minutes1.csv and minutes2.csv
+def read_minutes():
+    def read_file(file_name):
+        minutes = {"fields": [], "rows": []}
+        try:
+            with open(file_name, mode='r') as file:
+                csv_reader = csv.reader(file)
+                minutes["fields"] = next(csv_reader)
+                for row in csv_reader:
+                    minutes["rows"].append(tuple(row))  # Convert rows to tuple
+        except Exception as e:
+            print("An exception occurred: ", e)
+        return minutes
+
+    minutes1 = read_file("../csv/minutes1.csv")
+    minutes2 = read_file("../csv/minutes2.csv")
+    return minutes1, minutes2
+
+# Test
+minutes1, minutes2 = read_minutes()
+print(minutes1)
+print(minutes2)
+
+#Task 13: Create minutes_set
+def create_minutes_set():
+    minutes1_set = set(minutes1["rows"])
+    minutes2_set = set(minutes2["rows"])
+    return minutes1_set.union(minutes2_set)
+
+# Test
+minutes_set = create_minutes_set()
+print(minutes_set)
+
+#Task 14: Convert to datetime
+from datetime import datetime
+
+def create_minutes_list():
+    minutes_list = list(minutes_set)
+    return list(map(lambda x: (x[0], datetime.strptime(x[1], "%B %d, %Y")), minutes_list))
+
+# Test
+minutes_list = create_minutes_list()
+print(minutes_list)
+
+#Task 15: Write Out Sorted List
+def write_sorted_list():
+    minutes_list.sort(key=lambda x: x[1])  # Sort by datetime
+    with open("./minutes.csv", mode='w', newline='') as file:
+        csv_writer = csv.writer(file)
+        csv_writer.writerow(minutes1["fields"])
+        for row in minutes_list:
+            csv_writer.writerow([row[0], row[1].strftime("%B %d, %Y")])
+    return minutes_list
+
+# Test
+sorted_minutes = write_sorted_list()
+print(sorted_minutes)
diff --git a/assignment2/diary.txt b/assignment2/diary.txt
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+done for now 
+
+done for now 
+done for now 
+done for now 
+done for now 
+exit 
+done for now 
+done for now 
+done for now 
+"done for now"
+
+
+
+
+
+
+
+
+done for now 
+
+I went to school 
+meet friends 
+done for now 
+done for now 
+done for now 
+done for now 
+I went to dinner 
+meet some friends 
+i went to gym 
+done for now 
+done for now 
+done  for now 
+went to gym 
+meet some friends 
+went to dinner 
+done for now 
+done for now 
+done for now 
diff --git a/assignment3/additional_employees.json b/assignment3/additional_employees.json
@@ -0,0 +1,4 @@
+[
+    {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
+    {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
+]
diff --git a/assignment3/assignment3-test.py b/assignment3/assignment3-test.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import os
 
-test1_df = pd.DataFrame({   'Name': ['Alice', 'Bob', 'charlie'], 
+test1_df = pd.DataFrame({   'Name': ['Alice', 'Bob', 'Charlie'], 
                             'Age': [25, 30, 35], 
                             'City': ['New York', 'Los Angeles', 'Chicago']})
 
@@ -104,9 +104,10 @@ def test_department_uppercase():
             all_upper = False
     assert all_upper
 
-
-
-
+# April 2: If dates are not converted properly with form="mixed" will end up with NaTs
+def test_hire_date_notNAT():
+    nat_count = a3.clean_data['Hire Date'].isna().sum()
+    assert nat_count == 0
 
 
 

diff --git a/assignment3/assignment3.py b/assignment3/assignment3.py
@@ -0,0 +1,76 @@
+# Task 1: Introduction to Pandas - Creating and Manipulating DataFrames
+    #1. Create a DataFrame from a dictionary:
+import pandas as pd
+
+# Create dictionary
+data = {
+    'Name': ['Alice', 'Bob', 'Charlie'],
+    'Age': [25, 30, 35],
+    'City': ['New York', 'Los Angeles', 'Chicago']
+}
+# Convert to DataFrame
+task1_data_frame = pd.DataFrame(data)
+print(task1_data_frame)
+    #2. Add a new column:
+task1_with_salary = task1_data_frame.copy()
+task1_with_salary['Salary'] = [70000, 80000, 90000]
+print(task1_with_salary)
+    #3. Modify an existing column:
+task1_older = task1_with_salary.copy()
+task1_older['Age'] = task1_older['Age'] + 1
+print(task1_older)
+    #4. Save the DataFrame as a CSV file:
+task1_older.to_csv('employees.csv', index=False)
+
+#Task 2: Loading Data from CSV and JSON
+    #1. Read data from a CSV file:
+task2_employees = pd.read_csv('employees.csv')
+print(task2_employees)
+    #2. Read data from a JSON file:
+json_employees = pd.read_json('additional_employees.json')
+print(json_employees)
+more_employees = pd.concat([task2_employees, json_employees], ignore_index=True)
+print(more_employees)
+
+#Task 3: Data Inspection - Using Head, Tail, and Info Methods
+    #Using the head() method:
+first_three = more_employees.head(3)
+print(first_three)
+    # Using the tail() method:
+last_two = more_employees.tail(2)
+print(last_two)
+    #Get the shape of a DataFrame
+employee_shape = more_employees.shape
+print(employee_shape)   
+    #Use the info() method:
+more_employees.info()
+
+# Task 4: Data Cleaning
+    # Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data.
+dirty_data = pd.read_csv('dirty_data.csv')
+print(dirty_data)
+clean_data = dirty_data.copy()
+    #Remove any duplicate rows from the DataFrame
+clean_data = clean_data.drop_duplicates()
+print(clean_data)
+    # Convert Age to numeric and handle missing values
+clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')
+print(clean_data)
+    # Convert Salary to numeric and replace known placeholders (unknown, n/a) with NaN  
+clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA)
+clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors='coerce')
+print(clean_data)   
+    # Fill missing numeric values (use fillna).  Fill Age which the mean and Salary with the median
+mean_age = clean_data['Age'].mean()
+median_salary = clean_data['Salary'].median()
+
+clean_data['Age'] = clean_data['Age'].fillna(mean_age)
+clean_data['Salary'] = clean_data['Salary'].fillna(median_salary)
+print(clean_data)   
+    #Convert Hire Date to datetime
+clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce')
+print(clean_data)
+    # Strip extra whitespace and standardize Name and Department as uppercase
+clean_data['Name'] = clean_data['Name'].str.strip().str.upper()
+clean_data['Department'] = clean_data['Department'].str.strip().str.upper()
+print(clean_data)
diff --git a/assignment3/employees.csv b/assignment3/employees.csv
@@ -0,0 +1,4 @@
+Name,Age,City,Salary
+Alice,26,New York,70000
+Bob,31,Los Angeles,80000
+Charlie,36,Chicago,90000