##############################################################################
#                                                                            #
# kftpupdater - Mirror a local directory to an ftp server or vice versa      #
# Copyright (C) 2000 Martin P. Holland  <m.holland@noether.freeserve.co.uk>  #
#                                                                            #
# This program is free software; you can redistribute it and/or modify       #
# it under the terms of the GNU General Public License as published by       #
# the Free Software Foundation; either version 2 of the License, or          #
# (at your option) any later version.                                        #
#                                                                            #
# This program is distributed in the hope that it will be useful,            #
# but WITHOUT ANY WARRANTY; without even the implied warranty of             #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
# GNU General Public License for more details.                               #
#                                                                            #
# You should have received a copy of the GNU General Public License          #
# along with this program; if not, write to the Free Software                #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  #
#                                                                            #
##############################################################################

# Useful ideas and examples obtained from
# ftpparse.c (c) D. J. Bernstein 1997, djb@pobox.com

import string
_IS_BAD=-1
_IS_FILE=0
_IS_DIR=1

def ismonth(field):
    if field in ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'): return 1
    else: return 0

def getlink(file):
    return string.split(file,' -> ')[0]

def ignoredotdot(dirname):
    dirname=getlink(dirname)
    if dirname in (".",".."): return _IS_BAD,None
    else: return _IS_DIR,dirname

def listparse(line):
    """
    Parse an output LINE from a LIST command on an ftp server.

    Assumes LINE has had the \r\n stripped

    It returns:
    flag, filename
    where flag=_IS_BAD   not a regular file or directory
               _IS_FILE  a regular file or link
               _IS_DIR   a directory
    character/block devices, sockets, fifos, ., .., all fall into IS_BAD
    In the IS_BAD case None is returned for the filename
    In the link case the link name is returned for the filename
    """

# The basic algorithm is that for unix/windows/dos/netware/mac we always have
# * month notmonth notmonth filename (where * = some fields)
# so once we find the month field the filename field is the month field +3
# Further the month field is either the fourth, fifth or sixth field
# although we have to watch out for a group name which could be
# a month and give a false positive.
# Caveat: a device on unix can take the month field into the seventh
# field but we don't care about devices.
# crw-------   1 root   sys       14,   3 Apr 17  1999 /dev/dsp


    fields=string.split(line,None) #split into fields separated by whitespace
    if len(fields)<6: return _IS_BAD,None
    if not ismonth(fields[3]) and not ismonth(fields[4]) and not ismonth(fields[5]) and not line[0] in 'bc':
        #4th 5th and 6th fields are not months and it isn't a unix device
        #can only be a vax style line or junk
        #00README.TXT;1       2 30-DEC-1996 17:44 [SYSTEM] (RWED,RWED,RE,RE)
        #CORE.DIR;1           1  8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)
        #this will cope with spaces in the filename. If that's not possible
        #a simpler routine can be used
        split=string.split(line,";",1) #split into before and after ;
        if len(split)==1: return _IS_BAD,None
        #make sure we the right number of fields
        aftersemi=string.split(split[1],None)
        if len(aftersemi)!=6: return _IS_BAD,None
        #belt and braces: check for month
        date=string.split(aftersemi[2],'-',2)
        if len(date)==3 and date[1] in ('JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','NOV','DEC'):
            filename=split[0]
            if filename[-4:]==".DIR": return _IS_DIR,filename
            else: return _IS_FILE,filename
        else:
            return _IS_BAD,None

    #must be >=7 fields at this point and must be unix, dos/windows, netware,
    #mac or junk.

    #don't care about sockets, fifos or devices
    if line[0] not in "-dl": return _IS_BAD,None

    if ismonth(fields[3]) and not ismonth(fields[4]) and not ismonth(fields[5]) and fields[0][0]=='d':
        #This case is a directory on a NetPresentz server (mac)
        #"drwxrwxr-x                folder                2 May 10          1996 network"

        #Note that with unix style listings (those with a hard links
        #field) it is possible to have the fourth field = groupname = a month
        #-rw-r--r--   1 Feb      Jan         0 Jun 10 17:39 foo
        #but then the sixth field (the real month field) will be a month too
        #and the if check rejects that.
        
        #Now we get rid of the first five fields. What's left is the dir name
        #possibly with spaces.
        return ignoredotdot(string.split(line,None,6)[6])
    if ismonth(fields[4]) and len(fields)>=8:
        #fifth field is a month (unix style with no hard links or Netware or
        #NetPresentz file)
        #-rw-r--r--     amb      hackers         0 Jun 10 17:39 foo
        #drw-r--r--     amb      hackers         0 Jun 10 17:39 foo
        #d [R----F--] supervisor             512       Jan 16 18:53    login
        #- [R----F--] rhesus              214059       Oct 20 15:27    cx.exe
        #-------r--         326  1391972  1392298 Nov 22  1995 MegaPhone.sit
        filename=getlink(string.split(line,None,7)[7])
    elif ismonth(fields[5]) and len(fields)>=9:
        #sixth field is a month (unix style or windows/dos)
        filename=getlink(string.split(line,None,8)[8])
    else: return _IS_BAD,None
    if line[0]=="d": return ignoredotdot(filename)
    else: return _IS_FILE,filename
        
def main():
    #the first example is taken from ftparse.c list. I wonder if it
    #is a typo actually. Either the group is 512 and the size is
    #missing or the group is missing. In any case the routine copes
    #with it but if it is possible for say, the inode field to be
    #missing and _simultaneously_ the group or size to be missing
    #then my routine would fail. The second one is an example of this
    #
    examples=(
    "dr-xr-xr-x   2 root      512 Apr          8  1994 etc",
    "-rw-r--r--     amb      hackers          Jun 10 17:39 foo",
    "d [R----F--] supervisor             512       Jan 16 18:53    login",
    "- [R----F--] rhesus              214059       Oct 20 15:27    cx.exe",
    "-------r--          326  1391972          1392298 Nov 22          1995 MegaPhone.sit",
    "drwxrwxr-x                folder                2 May 10          1996 network",
    "drwxrwxr-x   2 martin   martin       1024 Feb 23 17:21 .",
    "drwxr-xr-x 140 martin   martin       8192 Feb 23 16:12 ..",
    "crw-------   1 root   sys       14,   3 Apr 17  1999 /dev/dsp",
    "total 1",
    "-rw-r--r--     amb      hackers         0 Jun 10 17:39 foo",
    "-rw-r--r--   1 amb      hackers         0 Jun 10 17:39 foo -> bar",
    "-rw-r--r--   1 amb      hackers         0 Jun 10 17:39 foo",
    "-rw-r--r--     amb      hackers         0 Jun 10 17:39 foo -> bar",
    "drwx------   2 pm1mph   pm           512 Aug  8  1999 .ncftp",
    "drwxr-xr-x   2 unknown unknown     4096 Aug 30 18:35 images",
    "-rw-r--r--     amb      hackers         0 Jun 10 17:39 foo bar baz",
    "-rw-r--r--   1 amb      hackers         0 Jun 10 17:39 foo bar baz",
    "-rw-r--r--   1 amb      hackers         0 Jun 10 17:39 foo bar baz -> alpha beta",
    "CORE.DIR;1           1  8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)",
    "CII-MANUAL.TEX;1   213/216 29-JAN-1996 03:33:12  [ANONYMOU,ANONYMOUS]   (RWED,RWED,,)",
    "-rw-r--r--     Jan      Feb         0 Jun 10 17:39 foo bar baz",
    "-rw-r--r--   1 Feb      Jan         0 Jun 10 17:39 foo bar baz",
    "drw-r--r--   1 Feb      Jan         0 Jun 10 17:39 foo bar baz",
    "Total of 11 Files, 10966 Blocks.",
    "DISK$ANONFTP:[ANONYMOUS]",
    "Directory DISK$PCSA:[ANONYM]",
    "-zero one two three four Jan six seven",
    "-zero one two three Jan five six"


    )
    for line in examples:
        print line
        print listparse(line)
        print
if __name__=="__main__":
    main()
