From 026d6a6a6d039a87665c30fa44b4760cb5220b44 Mon Sep 17 00:00:00 2001 From: Martin Moeller Date: Sun, 2 May 2021 17:43:32 +0200 Subject: [PATCH 1/4] Updated ical2csv.py to match ical2txt.py, including recursive events and HTML removal Handled events that returned date objects in stead of datetime objects Added information on how to get the output in the timezone of your choice to the help --- ical2csv.py | 102 +++++++++++++++++++++++++++++++++++++++++----------- ical2txt.py | 18 +++++++--- 2 files changed, 96 insertions(+), 24 deletions(-) diff --git a/ical2csv.py b/ical2csv.py index 464a288..ab24dc8 100755 --- a/ical2csv.py +++ b/ical2csv.py @@ -3,8 +3,24 @@ import sys import os.path from icalendar import Calendar +import recurring_ical_events +from bs4 import BeautifulSoup +import warnings +from dateutil.parser import parse +import datetime import csv +warnings.filterwarnings("ignore", category=UserWarning, module='bs4') # We don't want warnings about URL's. We just what the URL printed, if there. + +if len(sys.argv) <= 1: + print("Please call this script with an ics-file as parameter.\n") + print("Even better, call it with start and end dates:\n") + print(sys.argv[0] + " myexport.ics 20210101 20210201") + print(sys.argv[0] + " myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59\n") + print("NOTE: If you need data in another timezone than the system is set to, override like this before running the script:") + print("export TZ=\"Europe/Copenhagen\"\n") + exit(1) + filename = sys.argv[1] # TODO: use regex to get file extension (chars after last period), in case it's not exactly 3 chars. file_extension = str(sys.argv[1])[-3:] @@ -25,6 +41,24 @@ def __init__(self, name): events = [] +def removehtml(html): + # Almost word for word copy from here: https://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python + + soup = BeautifulSoup(html, features="html.parser") + # kill all script and style elements + for script in soup(["script", "style"]): + script.extract() # remove it + + text = soup.get_text() # Get plain text + + # break into lines and remove leading and trailing space on each + lines = (line.strip() for line in text.splitlines()) + # break multi-headlines into a line each + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + # drop blank lines + text = '\n'.join(chunk for chunk in chunks if chunk) + + return text def open_cal(): if os.path.isfile(filename): @@ -32,24 +66,33 @@ def open_cal(): print("Extracting events from file:", filename, "\n") f = open(sys.argv[1], 'rb') gcal = Calendar.from_ical(f.read()) + revents = recurring_ical_events.of(gcal).between(istart,istop) - for component in gcal.walk(): +# for component in gcal.walk(): + for component in revents: event = CalendarEvent("event") - if component.get('TRANSP') == 'TRANSPARENT': continue #skip event that have not been accepted - if component.get('SUMMARY') == None: continue #skip blank items - event.summary = component.get('SUMMARY') - event.uid = component.get('UID') - if component.get('DESCRIPTION') == None: continue #skip blank items - event.description = component.get('DESCRIPTION') - event.location = component.get('LOCATION') - if hasattr(component.get('dtstart'), 'dt'): - event.start = component.get('dtstart').dt - if hasattr(component.get('dtend'), 'dt'): - event.end = component.get('dtend').dt - - - event.url = component.get('URL') - events.append(event) + v=(dir(component).count('get')) # Only proces data if object is a valid event + if (v != 0): + if component.get('TRANSP') == 'TRANSPARENT': continue #skip all day events and the like + if component.get('SUMMARY') == None: continue #skip blank items + event.summary = component.get('SUMMARY') + event.uid = component.get('UID') + if component.get('DESCRIPTION') == None: continue #skip blank items + event.description = component.get('DESCRIPTION') + event.location = component.get('LOCATION') + if hasattr(component.get('dtstart'), 'dt'): + event.start = component.get('dtstart').dt + if hasattr(component.get('dtend'), 'dt'): + event.end = component.get('dtend').dt + + if type(now) != type(event.start): # If we get a datetime.date object, convert to datetime.datetime + event.start=datetime.datetime.combine(event.start, datetime.time.min) + if type(now) != type(event.end): # If we get a datetime.date object, convert to datetime.datetime + event.end=datetime.datetime.combine(event.end, datetime.time.max) + event.start = event.start.astimezone() + event.end = event.end.astimezone() + event.url = component.get('URL') + events.append(event) f.close() else: print("You entered ", filename, ". ") @@ -63,14 +106,21 @@ def open_cal(): def csv_write(icsfile): csvfile = icsfile[:-3] + "csv" + spent=0 + evcount=0 + evskip=0 + sys.stdout.write("Processing events : ") try: with open(csvfile, 'w') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) wr.writerow(headers) for event in sortedevents: - values = (event.summary.encode('utf8').decode(), event.uid, event.description.encode('uft8').decode(), event.location, event.start, event.end, event.url) + values = (event.summary.encode('utf-8').decode(), event.uid, removehtml(event.description.encode('utf-8').decode()), event.location.encode('utf-8').decode(), event.start, event.end, event.url) wr.writerow(values) - print("Wrote to ", csvfile, "\n") + sys.stdout.write(".") + sys.stdout.flush() + evcount+=1 + print("\n\nWrote " + str(evcount) + " events to ", csvfile, "\n") except IOError: print("Could not open file! Please close Excel!") exit(0) @@ -86,7 +136,19 @@ def debug_event(class_name): print(class_name.end) print(class_name.url, "\n") -open_cal() -sortedevents=sorted(events, key=lambda obj: obj.start) # Needed to sort events. They are not fully chronological in a Google Calendard export ... +now=datetime.datetime.now() +istart=datetime.datetime.fromtimestamp(0) # Start of UNIX epoch (1970-01-01T00:00:00) +istop=now+datetime.timedelta(seconds=157680000) # Stop 5 years in the future, if no enddate is given, to make sure reucurring events don't go on forever ... + +if len(sys.argv) > 3: + if sys.argv[2] != '': + istart=parse(sys.argv[2]) + if sys.argv[3] != '': + istop=parse(sys.argv[3]) + +print("Opening ics file\n") +open_cal() # Open ics file and do initial parsing of events +print("Sorting events\n") +sortedevents=sorted(events, key=lambda obj: obj.start) # Make sure events are in chronological order csv_write(filename) #debug_event(event) diff --git a/ical2txt.py b/ical2txt.py index 68b2e0e..a172f91 100755 --- a/ical2txt.py +++ b/ical2txt.py @@ -16,6 +16,8 @@ print("Even better, call it with start and end dates:\n") print(sys.argv[0] + " myexport.ics 20210101 20210201") print(sys.argv[0] + " myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59\n") + print("NOTE: If you need data in another timezone than the system is set to, override like this before running the script:") + print("export TZ=\"Europe/Copenhagen\"\n") exit(1) filename = sys.argv[1] @@ -78,10 +80,16 @@ def open_cal(): event.description = component.get('DESCRIPTION') event.location = component.get('LOCATION') if hasattr(component.get('dtstart'), 'dt'): - event.start = component.get('dtstart').dt + event.start = component.get('dtstart').dt.astimezone() if hasattr(component.get('dtend'), 'dt'): - event.end = component.get('dtend').dt - + event.end = component.get('dtend').dt.astimezone() + + if type(now) != type(event.start): # If we get a datetime.date object, convert to datetime.datetime + event.start=datetime.datetime.combine(event.start, datetime.time.min) + if type(now) != type(event.end): # If we get a datetime.date object, convert to datetime.datetime + event.end=datetime.datetime.combine(event.end, datetime.time.max) + event.start = event.start.astimezone() + event.end = event.end.astimezone() event.url = component.get('URL') events.append(event) f.close() @@ -124,7 +132,7 @@ def txt_write(icsfile): minutes = divmod(ds,3600)[1]/60 description=removehtml(event.description.encode('utf-8').decode()) values = event.start.strftime("%H:%M") + " - " + event.end.strftime("%H:%M") + " (" + '{:02.0f}'.format(hours) + ":" + '{:02.0f}'.format(minutes) + ") " + event.summary.encode('utf-8').decode() - if event.location != '': values = values + " [" + event.location + "]" # Only include location if there is one + if event.location != '': values = values + " [" + event.location.encode('utf-8').decode() + "]" # Only include location if there is one # Remove Google Meet and Skype Meeting part of description trimmed=description.split('-::~')[0].split('......')[0] @@ -171,7 +179,9 @@ def debug_event(class_name): if sys.argv[3] != '': istop=parse(sys.argv[3]) +print("Opening ics file\n") open_cal() # Open ics file and do initial parsing of events +print("Sorting events\n") sortedevents=sorted(events, key=lambda obj: obj.start) # Make sure events are in chronological order txt_write(filename) # Write the matching events to the textfile. With recurring_ical_events, scoping is already done. #debug_event(event) From 66db983ed908ab0d23a9a3ea38b2e26d8e0d7336 Mon Sep 17 00:00:00 2001 From: Martin Moeller Date: Sun, 2 May 2021 18:04:53 +0200 Subject: [PATCH 2/4] Updated README.md with feature parity --- README.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 123fb41..71d6458 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ Download the `ical2csv.py` file. * [**Homepage**](http://icalendar.readthedocs.org/) * [**Code**](http://github.com/collective/icalendar) * **Installation**: `pip install icalendar` +* ***BeautifulSoup4*** (pip3 install beautifulsoup4) +* ***recurring-ical-events*** (pip3 install recurring-ical-events) * ***Python* 3** ***Note:*** pip may be called pip3 on some systems with both python2 and python3 as options. @@ -29,14 +31,6 @@ Like ical2csv.py, it parses an ics file and writes the output to a text-file. Th ## Installation of ical2txt Download the script or clone the project and get it from there. -### Dependencies for ical2txt -* ***setuptools*** (just in case : pip3 install setuptools) -* ***BeautifulSoup4*** (pip3 install beautifulsoup4) -* ***icalendar*** (pip3 install icalendar) - * [**Homepage**](http://icalendar.readthedocs.org/) - * [**Code**](http://github.com/collective/icalendar) -* ***recurring-ical-events*** (pip3 install recurring-ical-events) - ## Usage of ical2txt Call the script and pass in the location of the ics file. @@ -50,6 +44,7 @@ Note: You can limit output to a certain time period. Useful for week logs and th `./ical2txt.py myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59` **NEW AS OF 2021-03-28:** Recurring events are now actually processed in ical2txt.py. If no end date is given 5 years from now is chosen. +**NEW AS OF 2021-05-02:** ical2csv.py update with same features as ical2txt.py, except removing Google/Skype meeting details. ## Contributing @@ -64,12 +59,15 @@ Note: You can limit output to a certain time period. Useful for week logs and th Lead Developer - ical2csv - [Erik Cox](https://github.com/erikcox/) Developer - ical2txt - [Martin Møller](https://github.com/martinm76) +Co-developer - ical2csv - [Martin Møller](https://github.com/martinm76) Python 3 compatibility and improvements - [bozoslivehere](https://github.com/bozoslivehere/) Logic and adjustments to sort events chronologically (Google Calendar doesn't do this in its export) - [Martin Møller](https://github.com/martinm76) -Removal of HTML code from events (currently only ical2txt) - [Martin Møller](https://github.com/martinm76) +Removal of HTML code from events - [Martin Møller](https://github.com/martinm76) +Conversion of date object to datetime objects, to allow sort - [Martin Møller](https://github.com/martinm76) +Timezone fixes [Martin Møller](https://github.com/martinm76) ## License From 1d1493d0c7e326fb9b2706ea3e7ccbff7a96f01c Mon Sep 17 00:00:00 2001 From: Martin Moeller Date: Sun, 2 May 2021 18:08:05 +0200 Subject: [PATCH 3/4] Updated README.md to fix layout bug --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 71d6458..883694a 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Note: You can limit output to a certain time period. Useful for week logs and th `./ical2txt.py myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59` **NEW AS OF 2021-03-28:** Recurring events are now actually processed in ical2txt.py. If no end date is given 5 years from now is chosen. + **NEW AS OF 2021-05-02:** ical2csv.py update with same features as ical2txt.py, except removing Google/Skype meeting details. ## Contributing @@ -66,7 +67,9 @@ Python 3 compatibility and improvements - [bozoslivehere](https://github.com/boz Logic and adjustments to sort events chronologically (Google Calendar doesn't do this in its export) - [Martin Møller](https://github.com/martinm76) Removal of HTML code from events - [Martin Møller](https://github.com/martinm76) + Conversion of date object to datetime objects, to allow sort - [Martin Møller](https://github.com/martinm76) + Timezone fixes [Martin Møller](https://github.com/martinm76) ## License From e0efa31018f7225ea93fa329f74c0db685b082db Mon Sep 17 00:00:00 2001 From: Martin Moeller Date: Sun, 2 May 2021 18:10:44 +0200 Subject: [PATCH 4/4] Updated README.md to fix layout bug#2 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 883694a..bdd2c6d 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,9 @@ Note: You can limit output to a certain time period. Useful for week logs and th ## Credits Lead Developer - ical2csv - [Erik Cox](https://github.com/erikcox/) +Co-developer - ical2csv - [Martin Møller](https://github.com/martinm76) Developer - ical2txt - [Martin Møller](https://github.com/martinm76) -Co-developer - ical2csv - [Martin Møller](https://github.com/martinm76) Python 3 compatibility and improvements - [bozoslivehere](https://github.com/bozoslivehere/) @@ -70,7 +70,7 @@ Removal of HTML code from events - [Martin Møller](https://github.com/martinm76 Conversion of date object to datetime objects, to allow sort - [Martin Møller](https://github.com/martinm76) -Timezone fixes [Martin Møller](https://github.com/martinm76) +Timezone fixes - [Martin Møller](https://github.com/martinm76) ## License