2020-11-02 04:46:00 +01:00
#!/usr/bin/python3
2016-02-04 23:01:46 +01:00
import sys
import os . path
from icalendar import Calendar
2023-02-22 13:25:53 +01:00
import recurring_ical_events
from bs4 import BeautifulSoup
import warnings
from dateutil . parser import parse
import datetime
2016-02-04 23:01:46 +01:00
import csv
2023-02-22 13:25:53 +01:00
warnings . filterwarnings ( " ignore " , category = UserWarning , module = ' bs4 ' ) # We don't want warnings about URL's. We just what the URL printed, if there.
if len ( sys . argv ) < = 1 :
print ( " Please call this script with an ics-file as parameter. \n " )
print ( " Even better, call it with start and end dates: \n " )
print ( sys . argv [ 0 ] + " myexport.ics 20210101 20210201 " )
print ( sys . argv [ 0 ] + " myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59 \n " )
print ( " NOTE: If you need data in another timezone than the system is set to, override like this before running the script: " )
print ( " export TZ= \" Europe/Copenhagen \" \n " )
exit ( 1 )
2016-02-04 23:01:46 +01:00
filename = sys . argv [ 1 ]
# TODO: use regex to get file extension (chars after last period), in case it's not exactly 3 chars.
file_extension = str ( sys . argv [ 1 ] ) [ - 3 : ]
headers = ( ' Summary ' , ' UID ' , ' Description ' , ' Location ' , ' Start Time ' , ' End Time ' , ' URL ' )
class CalendarEvent :
summary = ' '
uid = ' '
description = ' '
location = ' '
start = ' '
end = ' '
url = ' '
def __init__ ( self , name ) :
self . name = name
2019-10-24 17:49:10 +02:00
events = [ ]
2016-02-04 23:01:46 +01:00
2023-02-22 13:25:53 +01:00
def removehtml ( html ) :
# Almost word for word copy from here: https://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python
soup = BeautifulSoup ( html , features = " html.parser " )
# kill all script and style elements
for script in soup ( [ " script " , " style " ] ) :
script . extract ( ) # remove it
text = soup . get_text ( ) # Get plain text
# break into lines and remove leading and trailing space on each
lines = ( line . strip ( ) for line in text . splitlines ( ) )
# break multi-headlines into a line each
chunks = ( phrase . strip ( ) for line in lines for phrase in line . split ( " " ) )
# drop blank lines
text = ' \n ' . join ( chunk for chunk in chunks if chunk )
return text
2016-02-04 23:01:46 +01:00
def open_cal ( ) :
if os . path . isfile ( filename ) :
if file_extension == ' ics ' :
2023-02-22 13:42:17 +01:00
print ( " Extracting events from file: " , filename )
2016-02-04 23:01:46 +01:00
f = open ( sys . argv [ 1 ] , ' rb ' )
gcal = Calendar . from_ical ( f . read ( ) )
2023-02-22 13:25:53 +01:00
revents = recurring_ical_events . of ( gcal ) . between ( istart , istop )
2016-02-04 23:01:46 +01:00
2023-02-22 13:25:53 +01:00
# for component in gcal.walk():
for component in revents :
2019-10-24 17:49:10 +02:00
event = CalendarEvent ( " event " )
2023-02-22 13:25:53 +01:00
v = ( dir ( component ) . count ( ' get ' ) ) # Only proces data if object is a valid event
if ( v != 0 ) :
2023-02-22 13:42:17 +01:00
if component . get ( ' TRANSP ' ) == ' TRANSPARENT ' :
continue #skip all day events and the like
if component . get ( ' SUMMARY ' ) is None or component . get ( ' SUMMARY ' ) == " " :
continue #skip blank items
2023-02-22 13:25:53 +01:00
event . summary = component . get ( ' SUMMARY ' )
event . uid = component . get ( ' UID ' )
2023-02-22 13:42:17 +01:00
if component . get ( ' DESCRIPTION ' ) is None or component . get ( ' DESCRIPTION ' ) == " " :
continue #skip blank items
2023-02-22 13:25:53 +01:00
event . description = component . get ( ' DESCRIPTION ' )
event . location = component . get ( ' LOCATION ' )
if hasattr ( component . get ( ' dtstart ' ) , ' dt ' ) :
event . start = component . get ( ' dtstart ' ) . dt
if hasattr ( component . get ( ' dtend ' ) , ' dt ' ) :
event . end = component . get ( ' dtend ' ) . dt
if type ( now ) != type ( event . start ) : # If we get a datetime.date object, convert to datetime.datetime
event . start = datetime . datetime . combine ( event . start , datetime . time . min )
if type ( now ) != type ( event . end ) : # If we get a datetime.date object, convert to datetime.datetime
event . end = datetime . datetime . combine ( event . end , datetime . time . max )
event . start = event . start . astimezone ( )
event . end = event . end . astimezone ( )
event . url = component . get ( ' URL ' )
events . append ( event )
2016-02-04 23:01:46 +01:00
f . close ( )
else :
2019-10-24 16:16:31 +02:00
print ( " You entered " , filename , " . " )
print ( file_extension . upper ( ) , " is not a valid file format. Looking for an ICS file. " )
2016-02-04 23:01:46 +01:00
exit ( 0 )
else :
2019-10-24 16:16:31 +02:00
print ( " I can ' t find the file " , filename , " . " )
print ( " Please enter an ics file located in the same folder as this script. " )
2016-02-04 23:01:46 +01:00
exit ( 0 )
def csv_write ( icsfile ) :
csvfile = icsfile [ : - 3 ] + " csv "
2023-02-22 13:25:53 +01:00
spent = 0
evcount = 0
evskip = 0
2016-02-04 23:01:46 +01:00
try :
2019-10-24 17:49:10 +02:00
with open ( csvfile , ' w ' ) as myfile :
2016-02-04 23:01:46 +01:00
wr = csv . writer ( myfile , quoting = csv . QUOTE_ALL )
wr . writerow ( headers )
2020-11-02 04:46:00 +01:00
for event in sortedevents :
2023-02-22 13:42:17 +01:00
values = ( event . summary . encode ( ' utf-8 ' ) . decode ( ) ,
event . uid ,
removehtml ( event . description . encode ( ' utf-8 ' ) . decode ( ) ) ,
event . location . encode ( ' utf-8 ' ) . decode ( ) if event . location is not None else " " ,
event . start ,
event . end ,
event . url )
2019-10-24 17:49:10 +02:00
wr . writerow ( values )
2023-02-22 13:25:53 +01:00
evcount + = 1
2023-02-22 13:42:17 +01:00
print ( " Wrote " + str ( evcount ) + " events to " , csvfile )
2016-02-04 23:01:46 +01:00
except IOError :
2019-10-24 16:16:31 +02:00
print ( " Could not open file! Please close Excel! " )
2016-02-04 23:01:46 +01:00
exit ( 0 )
2023-02-22 13:25:53 +01:00
now = datetime . datetime . now ( )
istart = datetime . datetime . fromtimestamp ( 0 ) # Start of UNIX epoch (1970-01-01T00:00:00)
istop = now + datetime . timedelta ( seconds = 157680000 ) # Stop 5 years in the future, if no enddate is given, to make sure reucurring events don't go on forever ...
if len ( sys . argv ) > 3 :
if sys . argv [ 2 ] != ' ' :
istart = parse ( sys . argv [ 2 ] )
if sys . argv [ 3 ] != ' ' :
istop = parse ( sys . argv [ 3 ] )
2023-02-22 13:42:17 +01:00
open_cal ( ) # Open ics file and do initial parsing of events
2023-02-22 13:25:53 +01:00
sortedevents = sorted ( events , key = lambda obj : obj . start ) # Make sure events are in chronological order
2016-02-04 23:01:46 +01:00
csv_write ( filename )
2023-02-22 13:42:17 +01:00