"""Import CSV formatted 3-file Turner-like data-sets

These files are Turner-like in that they are 3-file CSVs with cross-file indexing.
"""
from __future__ import print_function
import csv,logging,os,argparse, datetime, json, glob
from atxstyle import utctime, standardlog
from fussy import twrite
import pytz
log = logging.getLogger(__name__)
HERE = os.path.dirname(__file__)

def load_stations( channel_file,  codec='latin-1' ):
    stations = {}
    log.info("Reading stations from: %s", channel_file )
    with open(channel_file) as fh:
        for record in csv.reader(fh):
            if not record:
                continue
            station = {
                'tmsid':record[0].decode(codec),
                'name':record[2].decode(codec),
                'short_name':record[1].decode(codec),
                'language':record[3].decode(codec),
                'timezone':record[8].decode(codec),
                'location':record[9].decode(codec),
            }
            stations[station['tmsid']] = station 
    return stations
def load_programs( program_file,  codec='latin-1' ):
    programs = {}
    log.info("Reading programs from: %s", program_file )
    with open(program_file) as fh:
        for record in csv.reader(fh):
            if len(record) < 11:
                continue
            program = {
                'program_id':record[0].decode(codec),
                'title':record[1].decode(codec),
                'show_title':record[10].decode(codec), 
                'genre': u'', 
                'language':u'',  # included only in the schedules, and as a formatted value...
            }
            programs[program['program_id']] = program
    return programs

def load_schedules( schedule_file,  codec='latin-1'):
    log.info("Reading schedules from: %s", schedule_file )
    with open(schedule_file) as fh:
        for record in csv.reader(fh):
            if len(record) < 5:
                continue
            date = record[2]
            month, day, year = [int(d, 10) for d in [date[:2], date[2:4], date[4:6]]]
            year += 2000
            start = record[3]
            hour, minute = [int(d, 10) for d in [start[:2], start[2:]]]
            duration = record[4]
            hours, minutes = [int(d, 10) for d in [duration[:2], duration[2:]]]
            schedule = [
                record[0].decode(codec),  # tmsid,
                record[1].decode(codec),  # program_id,
                (year, month, day, hour, minute), 
                hours*3600 + minutes*60, 
            ]
            yield schedule

def convert( station_files,  program_files, schedule_files, timezone=None, codec='latin-1' ):
    """Load data-set from stations,programs,schedules given parameters
    
    returns an epgdata format json-compatible structure 
    """
    if timezone is None:
        timezone = utctime.local_zone()
    else:
        timezone = pytz.timezone(timezone)
    stations = {}
    programs = {}
    for station_file in station_files:
        stations.update( load_stations( station_file, codec=codec ) )
    for program_file in program_files:
        programs.update(load_programs( program_file, codec=codec ))
    missing_stations = {}
    missing_programs = {}
    schedules = []
    for schedule_file in schedule_files:
        for schedule in load_schedules(schedule_file, codec=codec):
            if schedule[0] not in stations:
                missing_stations[schedule[0]] = missing_stations.get(schedule[0], 0)+1
            elif schedule[1] not in programs:
                missing_programs[schedule[1]] = missing_programs.get(schedule[1], 0)+1
            else:
                (year, month, day, hour, minute) = schedule[2]
                naive = datetime.datetime(*schedule[2])
                
                schedule[2] = utctime.as_timestamp(timezone.localize(naive))
                schedules.append(schedule)
    return {
        'success':True, 
        'schedules': schedules, 
        'stations': [
            [r[key] for key in [
                'tmsid', 'name', 'short_name', 'language', 'timezone', 'location'
            ]]
            for r in stations.values()
        ], 
        'programs': [
            [r[key] for key in [
                'program_id','title','show_title','genre','language', 
            ]]
            for r in programs.values()
        ], 
    }

CHANNELS_DEFAULT = '*.chn'
PROGRAMS_DEFAULT = '*.prg'
SCHEDULES_DEFAULT = '*.sch'
ENCODING_DEFAULT = 'latin1'
TIMEZONE_DEFAULT = None

def get_options():
    parser = argparse.ArgumentParser(description='Convert Rovi-style 3-DSV listings into EPG format')
    parser.add_argument('source', metavar='DIRECTORY', help="Directory from which to import (unpacked with the 3 files present)")
    parser.add_argument(
        '-o','--output', metavar="PATH", 
        help='Output file to write (default stdout)', 
        default=None, 
    )
    parser.add_argument(
        '--stations', metavar="PATH", 
        help='Output file to write stations subset', 
        default=None, 
    )
    parser.add_argument(
        '-c','--channels', default=CHANNELS_DEFAULT, 
        metavar='GLOB', help="Name of the channels file,  default: %s"%(CHANNELS_DEFAULT, ), 
    )
    parser.add_argument(
        '-p','--programs', default=PROGRAMS_DEFAULT, 
        metavar='GLOB', help="Name of the programs file,  default: %s"%(PROGRAMS_DEFAULT, ), 
    )
    parser.add_argument(
        '-S','--schedules', default=SCHEDULES_DEFAULT, 
        metavar='GLOB', help="Name of the schedules file,  default: %s"%(SCHEDULES_DEFAULT, ), 
    )
    parser.add_argument(
        '-t', '--timezone', default=None, 
        metavar='TZNAME', help="Name of the timezone (path from /usr/share/zoneinfo),  default: %s"%(
            TIMEZONE_DEFAULT or 'SYSTEM', 
        )
    )
    parser.add_argument(
        '-e', '--encoding',  default=ENCODING_DEFAULT, 
        metavar='ENCODING', help="Name of the encoding to use in interpreting files,  default: %s"%(
            ENCODING_DEFAULT, 
        ), 
    )
    return parser


@standardlog.with_debug( 'epgfetch-convert-rovi' )
def main():
    options = get_options().parse_args()
    filenames = [
        glob.glob(os.path.join(options.source, x)) 
        for x in [options.channels, options.programs, options.schedules]
    ]
    converted = convert( 
        *filenames, 
        ** dict(
            timezone = options.timezone, 
        )
    )
    linear = json.dumps(converted)
    if options.output:
        twrite.twrite(options.output, linear )
    else:
        print( linear)
    if options.stations:
        stations = {
            'success':True, 
            'stations': converted['stations'], 
        }
        linear = json.dumps(stations)
        twrite.twrite(options.stations, linear )
    
