# -*- coding: utf-8 -*-
"""
Overview:
    This program ( graph_views.py see VERSION for the version ) graphs data from the scrape_views.py log files
    Use in conjunction with scrape_views.py
    Author:  russ_hensel, see:  http:www.opencircuits.com/User:Russ_hensel
    Download: Download: Instructables search for: Graph Instructable Views with Pypthon Screen Scraping

Environment:
    OS:        Win 7  ( should work on other OSs with Python)
    IDE:       Spyder 2.3.1
    Language:  Python 2.7

Reminders, notes:
    logs to console and file, watch the output there
    fair number of commented out debugging statements
    will throw Python console errors on bad data

status/history

    working, but see enhancement list

    enhancements ! = in process, * = done

        !add other programs to analize the data
            *organize constans better at beginning of the program
            *save graphics automatically parameter controlled -- in graph line
            *coodrinate programs now graph_views.py scrape_views.py
            *implement constants
        !implement line count for error reporting
            *fix ledgends
            *get graph size up -- control size
            *warnings if data is off graph
            *use logit for messages
        consider listing long and short names of the plots above or below them
        !spif up the graphs some more
        !do we want errors or other stuff in a log file?
            *prints should probably mostly become logit()
            *show data points -- see code
        smooth data on slope ( daily views )? will need 10 of data points
        !time scale still needs some work
        web page not found is wrong, fix
        error in parse plot parms is just ignored more or less
        probably should throw my own exceptions on errors and exit cleanly
            *graph slope as percent of some value
        what if no graph of right type, end of a graphing set?
        !fix up name conventions -- may ignore pep8 if I do not like
            *use a circurlar list for plot styles rather than python default
        add multipoint smoothing for daily views

"""

# if you are missing any of these use pip, conda .... to install
import sys                          # system functions
import matplotlib.pyplot as plt     # plotting stuff
import os

PROG_NAME           = "graph_views.py"
VERSION             = "2015 Feb 12.2"     # version of program

# types of graph/plots supported
PLOT_TOT            = "graph_tot"
PLOT_SLOPE          = "graph_slope"
PLOT_SLOPE_PCT      = "graph_slope_pct"
PLOT_END            = "graph_end"          # implemented???

COUNT_EXT           = ".txt"

URL_FILE            = "urllist.txt"   #  name of file where urls are kept
LOG_FILE            = "views.log"     # name of the logging file
LIMIT               = 666             # max files to graph stop processing, mostly for testing

CNV_TODAYS          = 1./( 24*60*60 ) # factor to convert ticks to days

# globals   ... not usually good form but
plot_type   = PLOT_TOT   # again in get_args

tplot       = None # will be created in...

data_dir    = "" # if "" work in same directory as this program, may only work on windows?


class LineStyle():
    """
    dispenses row by row a tuple of row style types to
    help make it easy to tell graph lines apart
    use instead of mathplot defaults
    use:
        self.current_style  = self.lineStyle.getStyle()  # ( line, color, marker )
        self.axes.plot(  x, y, label= alabel, linestyle = self.current_style[0], marker = self.current_style[2],   color = self.current_style[1] )
    """
    
    def __init__(self ):

        self.lines       = [  '-', '--', ':' ]   
        self.line_ix     = 0
        self.max_line    = len( self.lines  )

        self.colors      = [ 'red', 'blue', 'cyan', 'green', 'black' ]   # want dark colors yellow and oarnge are light
        self.color_ix    = 0
        self.max_color   = len( self.colors  )

        self.markers     = [ 'o', 'x', '+', '*', 'h', 's' ]
        self.marker_ix   = 0
        self.max_marker  = len( self.markers  )

        #   https://stackoverflow.com/questions/8409095/matplotlib-set-markers-for-individual-points-on-a-line

    def getLine( self, ):
        """
        inside class use only,
        get next line sytle
        """
        
        ret       = self.lines[ self.line_ix ]
        self.line_ix  += 1
        if self.line_ix >= self.max_line:
            self.line_ix = 0
        return ret


    def getColor( self, ):
        """
        inside class use only,
        get next color sytle
        """

        ret       = self.colors[ self.color_ix ]
        self.color_ix  += 1
        if self.color_ix >= self.max_color:
            self.color_ix = 0
        return ret


    def getMarker( self, ):
        """
        inside class use only,
        get next marker sytle
        """

        ret       = self.markers[ self.marker_ix ]
        self.marker_ix  += 1
        if self.marker_ix >= self.max_marker:
            self.marker_ix = 0
        return ret


    def getStyle( self,  ):
        """
        get the next tuple: ( line, color, marker )
        """

        return ( self.getLine() , self.getColor(), self.getMarker()  )


class ViewPlot:
    """
    class to support plotting, i am using a class so instance
    can easily retain values between calls to its functions
    """

    def __init__( self ):
        """
        constructor as usual, sets defaults
        return nothing, but in use an instance
        """

        self.set_default_values()

        return


    def set_default_values( self ):
        """
        set defaults for constructor and parse_plot_pams( )?
        return zip
        """

        self.lineStyle      = LineStyle()
        self.current_style  = None         # assigned in add line   self.current_style  = self.lineStyle.getStyle()
        self.plot_max_y     = 250000.
        self.plot_min_y     = 0.

        self.fig            = None
        self.axes           = None     # may not need to be instance


        self.lines_ix       = -1 # track no of lines -1 is pre init
        self.save_plots     = False

        #next two to warn about out of range values only one warn on each plot?
        # may not need these may not be implemented
        self.warn_to_high   = False
        self.warn_to_low    = False

        self.plot_title  = "Instructable View Counts"
        self.save_graph  = "NO"       # plots saved when this set to .png

        return


    def parse_plot_pams( self, agraph_line ):
        """
        analize the parameters on a url list grapy line ( line begins with grapy_ )
        second step of setting up ViewPlot object, with parms
        agraph_line is the line from the url file starting in "graph"
        return nothing
        """

        global data_dir
        #print " start new graph parse_plot_pams" ;  print agraph_line
        #splits like this graph,  title = joe, max_y = 100000, min_y = 300000  see sample file for doc
        agraph_line  = agraph_line.strip()
        graph_split  = agraph_line.split( "," )
        #print splits  # check that got enough splits
        for asplit in graph_split[1:999]:   # skip first which is graph_ ...
            parm_split   = asplit.split( "=" )
            #print "parm_split = " ; #print  parm_split
            if len( parm_split ) == 2:
                parm_name    = parm_split[0].strip()
                parm_value   = parm_split[1].strip()
                #print parm_name + " === " + parm_value
            else:
                logit(  "Error: in parse_plot_pams parm_split does not have len 2 in line = " + str( url_file_line_ix ) )
                return

            # a dictionary approach might be better
            if parm_name == "max_y":
                self.plot_max_y  = float( parm_value )

            elif parm_name == "min_y":
                self.plot_min_y  = float( parm_value )

            elif parm_name == "title":
                self.plot_title  =  parm_value

            elif parm_name == "save_graph":
                self.save_graph  =  os.path.join(  data_dir, parm_value   )

            else:
                logit( "Error: parm error in file " + parm_name + "  " + parm_value + " at line = " + str( url_file_line_ix ) )
            self.lines_ix    = 0     # track no of lines 0 is parms set, may allow another set
        return


    def start_plot( self,  ):
        """
        start a plot process: initialize the graph
        no data yet
        return nothing
        """

        logit ( "start_plot() = " + self.plot_title )
        self.lines_ix      = 0

        #http:nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-4-Matplotlib.ipynb

        self.fig = plt.figure( figsize=(7, 7) )

        #self.axes = self.fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)
        self.axes = self.fig.add_subplot(111)  # to have multiple lines in the plot

        self.axes.set_xlabel( 'Time in days' )
        self.axes.set_ylabel( 'Views' )           # might want to make variable with type of plot

        self.axes.set_title( self.plot_title );

        self.axes.set_ylim([ self.plot_min_y ,  self.plot_max_y ])

        return


    def add_line( self, x, y, alabel ):
        """
        add a line of data in variables x y to the graph
        label ( legend ) alabel
        return nothing
        """

        self.lines_ix      += 1
        if len( y ) <= 1:
            logit( "Not enough data in: " + alabel + " " + " to plot." )
            return

        avalue   = max( y )
        if avalue > self.plot_max_y:
            logit( "Max value in: " + alabel + " " + str( avalue ) + " beyond plot max." )

        avalue   = min( y )
        if avalue < self.plot_min_y:
            logit( "Mininum value in: " + alabel + " " + str( avalue ) + " below plot min." )


        # markersize=None,  probably a line weight
        #self.axes.plot(  x, y, label= alabel, )
        self.current_style  = self.lineStyle.getStyle()

        # ( line, color, marker )
        self.axes.plot(  x, y, label= alabel, linestyle = self.current_style[0], marker = self.current_style[2],   color = self.current_style[1] )
        #self.axes.plot(  x, y, label= alabel, linestyle = "--", marker = "*" )
        # http://www.sagemath.org/doc/reference/plotting/sage/plot/plot.html

        #self.axes.legend(loc=10, bbox_to_anchor=(0.5, -0.1))  # 9 is below
        self.axes.legend( loc=4 ) #
        return


    def show_plot( self ):   # calling show seems to destroy the plot
        """
        show the plot and save to file
        should supress blank plot in most cases
        return nothing
        """

        logit(  "show_plot1()>>" + str( self.lines_ix ) )
        if self.lines_ix <= 0:
            return
        else:
            plt.show( self.fig )
            if self.save_graph[-4:] == ".png":
                logit( "saving the file " + self.save_graph )
                self.fig.savefig( self.save_graph ) #or use fig.savefig("filename.png", dpi=200)
                pass
            self.fig      = None  # cannot us show() again on same plot, use None as a flag
            self.lines_ix = 0

        return


#  -------------- end of class -----------------

def logit( amsg ):
    """
    log amsg ( to console and logfile )
    flush so messages do not get lost in error conditions
    open log file before using
    return nothing
    """

    global logfile

    logfile.write( amsg + "\n" )

    print amsg
    sys.stdout.flush()   # in a way optional, but can help get message in right place
    return


def get_args():
    """
    process arguments found on the command line -- if any
    note log file not yet open
    note: no spaces around = signs on command line
    errors from ignore to sys.exit if fails, brutal may have side effects
    return nothing
    for grapy_views command line might be like:
            python graph_views.py  data_dir=russ_data  graph_type=graph_tot
    """

    global data_dir
    global plot_type

    plot_type   = PLOT_TOT

    for iarg in sys.argv[1:]:
        #print iarg
        argsplits   = iarg.split("=")
        parm_name   = argsplits[0]
        parm_value  = argsplits[1]
        #print argsplits

        if parm_name == "data_dir":

            data_dir =  parm_value                  
           # print "command line arg >> " + iarg    # log file not yet open

        elif parm_name == "graph_type":
            #print parm_value
            plot_type  = parm_value

            if plot_type not in[PLOT_TOT, PLOT_SLOPE, PLOT_SLOPE_PCT ]:
                print "Error: plot_type = " + parm_value

        else:
            pass
            print "Error: arg line " + parm_name + "  " + parm_value
            sys.exit(1)

    return


def read_urllist():
    """
    read all the url's .... into the list "sites" and return it
    """

    try:
        sitefn  = os.path.join( data_dir, URL_FILE )
        sites   = open( sitefn ).readlines() # 'r' mode is default
    except IOError:
        print "Error: Couldn't find url site file = " + sitefn
        sys.exit(1) # to drastic?
    return sites


def convert_to_slope_percent( x, y ):
    """
    convert counts to slopes as percent of current value, ie daily views as percent
    return the two lists in a tuple
    """

    lastx = ( x[0] )
    lasty = ( y[0] )
    nx    = []
    ny    = []
    for ix in range( 1, len( x )):
        thisx = ( x[ix] )
        thisy = ( y[ix] )
        if thisy == lasty:
            pass
        else:
            nx.append( thisx )
            slope      = ( thisy - lasty) / ( thisx - lastx )  # might be better to do the algebra
            ny.append( ( slope / lasty) * 100 )
            lasty = thisy
            lastx = thisx
    return ( nx, ny )


def convert_to_slope( x, y ):
    """
    convert counts to slopes, ie daily views
    return the two lists in a tuple
    """

    lastx = ( x[0] )
    lasty = ( y[0] )
    nx    = []
    ny    = []
    for ix in range( 1, len( x )):
        thisx = ( x[ix] )
        thisy = ( y[ix] )
        if thisy == lasty:
            pass
        else:
            nx.append( thisx )
            ny.append( ( thisy - lasty) / ( thisx - lastx )  )
            lasty = thisy
            lastx = thisx
    return ( nx, ny )


def plot_file( atplot, afilename, atitle ):
    """
    plot data from a single site ( data file )
    call after plot has been started.
    """

    filename =  os.path.join( data_dir, afilename + COUNT_EXT  )
    logit( "Reading data from > " + filename )
    try:
        lines = open( filename ).readlines()
    except IOError:
        logit( "Error: Couldn't find data file = " + filename )
        #sys.exit(1) # to drastic?
        return

    # get data, text strings, and parse and convert to numeric lists
    # also drop items where count does not go up
    x          = []    # still strings convert soon in program
    y          = []
    last_count = 0.

    for line in lines:
        line   = line.strip()
        splits = line.split( "," )
        #print splits  # check that got enough splits
        #continue  todo fix this so we are dealing with numbers maybe days
        xtime    = float ( splits[1].strip() )  * ( CNV_TODAYS )
        count    = float ( splits[0].strip() )
        if  count > last_count:         # suppress counts that do not go up
            x.append( xtime )
            y.append( count )
            last_count = count
            #print xtime + " " + count

    #---------------------------------- plot the data , one of n types
    if   plot_type == PLOT_TOT:
         atplot.add_line( x, y, afilename )

    elif plot_type == PLOT_SLOPE:
         nxy = convert_to_slope( x, y )
         atplot.add_line( nxy[0], nxy[1], afilename )
         #pass

    elif plot_type == PLOT_SLOPE_PCT:
         nxy = convert_to_slope_percent( x, y )
         atplot.add_line( nxy[0], nxy[1], afilename )

    return


def xshow_plot():
    """
    show the plot if it is ready ( at least one line )
    then essentially destroy and make a fresh one so tplot is always a
    valid reference, use to make sure when show is called we make
    a new instance of ViewPlot
    return nothing
    """

    global tplot

    tplot.show_plot( )
    tplot  = ViewPlot()      
    return

def prog_info():
        """
        log info about program and its argument/enviroment
        nice to have system time and date
        """

        logit( "" )
        logit( "============================" )
        logit( "" )

        logit( "Ploting with " + PROG_NAME + " version = " + VERSION )
        logit( "" )
        #  data_dir=russ_data  graph_type=graph_tot
        if len( sys.argv ) == 0:
            logit( "no command line arg " )
        else:
            ix_arg = 0
            for aArg in  sys.argv:

                logit( "command line arg " + str( ix_arg ) + " = " + sys.argv[ix_arg])
                ix_arg += 1

        logit( "current directory " +  os.getcwd() )
        return


def close_main( amsg ):
    """
    cleanup and close
    kick out any plot in progress
    return nothing
    """

    xshow_plot()
    if amsg <> "":
         logit( amsg )
    #exit()   # for single thread?
    #sys.exit( acode ) # 0 no error message?
    return   # seems have to return to main to return again


def main():
    """
    main program overall control here
    urlfile is read and processed
    """

    global  logfile
    global  tplot
    global  url_file_line_ix   # may not be global may not be used

    get_args()

    lf_name     = os.path.join( data_dir, LOG_FILE )   # should work in all os's
    logfile     = open( lf_name,"a" )                  # opens file for logging

    prog_info()

    logit( "logging to  " + lf_name )

    tplot  = ViewPlot()
    sites  = read_urllist()

    url_file_line_ix    = 0
    ix                  = 0
    for isite in sites:
        url_file_line_ix += 1
        isite  = isite.strip()
        splits = isite.split(",")
        split0 = splits[0].strip() 
        #logit( "isite = " + isite )
        if isite == "":  # to skip blank lines, not a great way
            isite = "#"
        if isite[0:1] <> "#":      # skip the comment lines
            if  ( splits[0:10] ==  "graph_exit" ):
                close_main( "hit graph_exit at line = " + str( url_file_line_ix ) )
                return

            elif ( split0 ==  PLOT_END):
                logit( "hit plot end" )
                xshow_plot()

            elif  ( split0 ==  plot_type ):  # show graph when next graph line or eof is reached
                # this is start of a new plot except for plot end, eliminate earlier
                xshow_plot()
                tplot.parse_plot_pams( isite )
                tplot.start_plot( )

            elif ( split0[0:6] ==  "graph_" ):
                if tplot.lines_ix   > 0:
                    xshow_plot()
                #logit( "skip line "  + isite )
                #pass # wrong plot type ignore

            elif tplot.lines_ix   >= 0:

                if len( splits ) < 3:
                    logit( "Not enough parameter in file at line " + str( url_file_line_ix) )
                    continue # skip this url line

                ititle  = splits[0].strip()      # was split0
                ilfn    = splits[2].strip()      # lfn = log file name
                #logit( "parse url line to: " + ititle + " " + ilfn )
                ix  += 1
                if ix > LIMIT:
                    logit( "hit limit of " +  str( LIMIT ) + " sites" )
                    return   # but we are in main.....
                plot_file( tplot, ilfn, ititle )
        else:
            # isite is just a comment or blank line or....
            logit( isite )


    close_main( "" )
    logit( "------------------" + PROG_NAME + "  done -------------------" )
    logit( "" )   # yes i want a blank space
    logfile.close()

if __name__ == '__main__':
    
    
    # the program is intended to run with command line arguments either supplied
    # from the development ide, the command line, or from a batch file
    # if these do not work for you you can "inject" command arguments using a lien
    # like this next one: 
    # sys.argv = [ "normally filename", "data_dir=test_data", "graph_type=graph_slope"  ]
    
    # to process files in the current directory run with data_dir = 
    # sys.argv = [ "normally filename", "data_dir=", "graph_type=graph_tot" ]
    
    
    main()


