1 """
2 Module for parsing FTP data.
3
4 Currently, this module contains classes for parsing FTP ``LIST`` command
5 output from a variety of FTP servers. In the future, this module may be
6 extended to handle other FTP parsing chores. (Or not.)
7
8 The FTP ``LIST`` parsing logic was adapted for Python from D. J. Bernstein's
9 ``ftpparse.c`` library. See http://cr.yp.to/ftpparse.html. The logic in this
10 module is functionally similar to Bernstein's parser, with the following
11 differences:
12
13 - Bernstein's C-specific logic has been made more Python-like.
14 - The basic parser is encapsulated inside an `FTPListDataParser` class,
15 instead of a function.
16 - The ``ID_TYPE`` and ``MTIME_TYPE`` values are enumerations.
17 - ``SIZE_TYPE`` is not supported (since it was always being set to the
18 same value anyway).
19
20 Currently covered formats:
21
22 - `EPLF`_
23 - UNIX *ls*, with or without group ID
24 - Microsoft FTP Service
25 - Windows NT FTP Server
26 - VMS
27 - WFTPD
28 - NetPresenz (Mac)
29 - NetWare
30 - MSDOS
31 - MLST
32
33 .. _EPLF: http://cr.yp.to/ftp/list/eplf.html
34
35 Definitely not covered:
36
37 - Long VMS filenames, with information split across two lines.
38 - NCSA Telnet FTP server. Has LIST = NLST (and bad NLST for directories).
39 """
40
41 __docformat__ = 'restructuredtext en'
42
43
44
45
46
47 import time
48 from enum import Enum
49 import calendar
50 from ftplib import error_perm
51
52
53
54
55
56 __all__ = ['parse_ftp_list_line',
57 'FTPListData',
58 'FTPListDataParser',
59 'FTPMlstDataParser',
60 'ID_TYPE',
61 'MTIME_TYPE']
62
63
64
65
66
67 MONTHS = ('jan', 'feb', 'mar', 'apr', 'may', 'jun',
68 'jul', 'aug', 'sep', 'oct', 'nov', 'dec')
69
70 MTIME_TYPE = Enum('UNKNOWN', 'LOCAL', 'REMOTE_MINUTE', 'REMOTE_DAY')
71 """
72 ``MTIME_TYPE`` identifies how a modification time ought to be interpreted
73 (assuming the caller cares).
74
75 - ``LOCAL``: Time is local to the client, granular to (at least) the minute
76 - ``REMOTE_MINUTE``: Time is local to the server and granular to the minute
77 - ``REMOTE_DAY``: Time is local to the server and granular to the day.
78 - ``UNKNOWN``: Time's locale is unknown.
79 """
80
81 ID_TYPE = Enum('UNKNOWN', 'FULL')
82 """
83 ``ID_TYPE`` identifies how a file's identifier should be interpreted.
84
85 - ``FULL``: The ID is known to be complete.
86 - ``UNKNOWN``: The ID is not set or its type is unknown.
87 """
88
89
90
91
92
93 now = time.time()
94 current_year = time.localtime().tm_year
95
96
97
98
99
101 """
102 The `FTPListDataParser` class's ``parse_line()`` method returns an
103 instance of this class, capturing the parsed data.
104
105 :IVariables:
106 name : str
107 The name of the file, if parsable
108 try_cwd : bool
109 ``True`` if the entry might be a directory (i.e., the caller
110 might want to try an FTP ``CWD`` command), ``False`` if it
111 cannot possibly be a directory.
112 try_retr : bool
113 ``True`` if the entry might be a retrievable file (i.e., the caller
114 might want to try an FTP ``RETR`` command), ``False`` if it
115 cannot possibly be a file.
116 size : long
117 The file's size, in bytes
118 mtime : long
119 The file's modification time, as a value that can be passed to
120 ``time.localtime()``.
121 mtime_type : `MTIME_TYPE`
122 How to interpret the modification time. See `MTIME_TYPE`.
123 id : str
124 A unique identifier for the file. The unique identifier is unique
125 on the *server*. On a Unix system, this identifier might be the
126 device number and the file's inode; on other system's, it might
127 be something else. It's also possible for this field to be ``None``.
128 id_type : `ID_TYPE`
129 How to interpret the identifier. See `ID_TYPE`.
130 """
131
133 self.raw_line = raw_line
134 self.name = None
135 self.try_cwd = False
136 self.try_retr = False
137 self.size = 0
138 self.mtime_type = MTIME_TYPE.UNKNOWN
139 self.mtime = 0
140 self.id_type = ID_TYPE.UNKNOWN
141 self.id = None
142
144 """
145 An ``FTPListDataParser`` object can be used to parse one or more lines
146 that were retrieved by an FTP ``LIST`` command that was sent to a remote
147 server.
148 """
151
153 """
154 Parse a line from an FTP ``LIST`` command.
155
156 :Parameters:
157 ftp_list_line : str
158 The line of output
159
160 :rtype: `FTPListData`
161 :return: An `FTPListData` object describing the parsed line, or
162 ``None`` if the line could not be parsed. Note that it's
163 possible for this method to return a partially-filled
164 `FTPListData` object (e.g., one without a name).
165 """
166 buf = ftp_list_line
167
168 if len(buf) < 2:
169 return None
170
171 c = buf[0]
172 if c == '+':
173 return self._parse_EPLF(buf)
174
175 elif c in 'bcdlps-':
176 return self._parse_unix_style(buf)
177
178 i = buf.find(';')
179 if i > 0:
180 return self._parse_multinet(buf, i)
181
182 if c in '0123456789':
183 return self._parse_msdos(buf)
184
185 return None
186
187
188
189
190
191
192
194 year = None
195 t = None
196
197 for year in range(current_year - 1, current_year + 100):
198 t = self._get_mtime(year, month, mday, hour, minute)
199 if (now - t) < (350 * 86400):
200 return t
201
202 return 0
203
204 - def _get_mtime(self, year, month, mday, hour=0, minute=0, second=0):
205 return time.mktime((year, month, mday, hour, minute, second, 0, 0, -1))
206
208 if len(buf) == 3:
209 for i in range(0, 12):
210 if buf.lower().startswith(MONTHS[i]):
211 return i+1
212 return -1
213
215 result = FTPListData(buf)
216
217
218
219
220 i = 1
221 for j in range(1, len(buf)):
222 if buf[j] == '\t':
223 result.name = buf[j+1:]
224 break
225
226 if buf[j] == ',':
227 c = buf[i]
228 if c == '/':
229 result.try_cwd = True
230 elif c == 'r':
231 result.try_retr = True
232 elif c == 's':
233 result.size = long(buf[i+1:j])
234 elif c == 'm':
235 result.mtime_type = MTIME_TYPE.LOCAL
236 result.mtime = long(buf[i+1:j])
237 elif c == 'i':
238 result.id_type = ID_TYPE.FULL
239 result.id = buf[i+1:j-i-1]
240
241 i = j + 1
242
243 return result
244
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267 result = FTPListData(buf)
268
269 buflen = len(buf)
270 c = buf[0]
271 if c == 'd':
272 result.try_cwd = True
273 if c == '-':
274 result.try_retr = True
275 if c == 'l':
276 result.try_retr = True
277 result.try_cwd = True
278
279 state = 1
280 i = 0
281 tokens = buf.split()
282 for j in range(1, buflen):
283 if (buf[j] == ' ') and (buf[j - 1] != ' '):
284 if state == 1:
285 state = 2
286
287 elif state == 2:
288 state = 3
289 if ((j - i) == 6) and (buf[i] == 'f'):
290 state = 4
291
292 elif state == 3:
293 state = 4
294
295 elif state == 4:
296 try:
297 size = long(buf[i:j])
298 except ValueError:
299 pass
300 state = 5
301
302 elif state == 5:
303 month = self._get_month(buf[i:j])
304 if month >= 0:
305 state = 6
306 else:
307 size = long(buf[i:j])
308
309 elif state == 6:
310 mday = long(buf[i:j])
311 state = 7
312
313 elif state == 7:
314 if (j - i == 4) and (buf[i+1] == ':'):
315 hour = long(buf[i])
316 minute = long(buf[i+2:i+4])
317 result.mtime_type = MTIME_TYPE.REMOTE_MINUTE
318 result.mtime = self._guess_time(month, mday, hour, minute)
319 elif (j - i == 5) and (buf[i+2] == ':'):
320 hour = long(buf[i:i+2])
321 minute = long(buf[i+3:i+5])
322 result.mtime_type = MTIME_TYPE.REMOTE_MINUTE
323 result.mtime = self._guess_time(month, mday, hour, minute)
324 elif j - i >= 4:
325 year = long(buf[i:j])
326 result.mtime_type = MTIME_TYPE.REMOTE_DAY
327 result.mtime = self._get_mtime(year, month, mday)
328 else:
329 break
330
331 result.name = buf[j+1:]
332 state = 8
333 elif state == 8:
334 pass
335
336 i = j + 1
337 while (i < buflen) and (buf[i] == ' '):
338 i += 1
339
340
341
342
343 result.size = size
344
345 if c == 'l':
346 i = 0
347 while (i + 3) < len(result.name):
348 if result.name[i:i+4] == ' -> ':
349 result.name = result.name[:i]
350 break
351 i += 1
352
353
354 if (buf[1] == ' ') or (buf[1] == '['):
355 namelen = len(result.name)
356 if namelen > 3:
357 result.name = result.name.strip()
358
359 return result
360
362
363
364
365
366
367
368
369 result = FTPListData(buf)
370 result.name = buf[:i]
371 buflen = len(buf)
372
373 if i > 4:
374 if buf[i-4:i] == '.DIR':
375 result.name = result.name[0:-4]
376 result.try_cwd = True
377
378 if not result.try_cwd:
379 result.try_retr = True
380
381 try:
382 i = buf.index(' ', i)
383 i = _skip(buf, i, ' ')
384 i = buf.index(' ', i)
385 i = _skip(buf, i, ' ')
386
387 j = i
388
389 j = buf.index('-', j)
390 mday = long(buf[i:j])
391
392 j = _skip(buf, j, '-')
393 i = j
394 j = buf.index('-', j)
395 month = self._get_month(buf[i:j])
396 if month < 0:
397 raise IndexError
398
399 j = _skip(buf, j, '-')
400 i = j
401 j = buf.index(' ', j)
402 year = long(buf[i:j])
403
404 j = _skip(buf, j, ' ')
405 i = j
406
407 j = buf.index(':', j)
408 hour = long(buf[i:j])
409 j = _skip(buf, j, ':')
410 i = j
411
412 while (buf[j] != ':') and (buf[j] != ' '):
413 j += 1
414 if j == buflen:
415 raise IndexError
416
417 minute = long(buf[i:j])
418
419 result.mtime_type = MTIME_TYPE.REMOTE_MINUTE
420 result.mtime = self._get_mtime(year, month, mday, hour, minute)
421
422 except IndexError:
423 pass
424
425 return result
426
428
429
430
431
432
433 buflen = len(buf)
434 i = 0
435 j = 0
436
437 try:
438 result = FTPListData(buf)
439
440 j = buf.index('-', j)
441 month = long(buf[i:j])
442
443 j = _skip(buf, j, '-')
444 i = j
445 j = buf.index('-', j)
446 mday = long(buf[i:j])
447
448 j = _skip(buf, j, '-')
449 i = j
450 j = buf.index(' ', j)
451 year = long(buf[i:j])
452 if year < 50:
453 year += 2000
454 if year < 1000:
455 year += 1900
456
457 j = _skip(buf, j, ' ')
458 i = j
459 j = buf.index(':', j)
460 hour = long(buf[i:j])
461 j = _skip(buf, j, ':')
462 i = j
463 while not (buf[j] in 'AP'):
464 j += 1
465 if j == buflen:
466 raise IndexError
467 minute = long(buf[i:j])
468
469 if buf[j] == 'A':
470 j += 1
471 if j == buflen:
472 raise IndexError
473
474 if buf[j] == 'P':
475 hour = (hour + 12) % 24
476 j += 1
477 if j == buflen:
478 raise IndexError
479
480 if buf[j] == 'M':
481 j += 1
482 if j == buflen:
483 raise IndexError
484
485 j = _skip(buf, j, ' ')
486 if buf[j] == '<':
487 result.try_cwd = True
488 j = buf.index(' ', j)
489 else:
490 i = j
491 j = buf.index(' ', j)
492
493 result.size = long(buf[i:j])
494 result.try_retr = True
495
496 j = _skip(buf, j, ' ')
497
498 result.name = buf[j:]
499 result.mtime_type = MTIME_TYPE.REMOTE_MINUTE
500 result.mtime = self._get_mtime(year, month, mday, hour, minute)
501 except IndexError:
502 pass
503
504 return result
505
507 """
508 An ``FTPMlstDataParser`` object can be used to parse one or more lines
509 that were retrieved by an FTP ``MLST`` or ``MLSD`` command that was sent
510 to a remote server.
511
512 Contributed by Andrew Scheller <gcode@loowis.durge.org>.
513 """
516
518 """
519 Parse a line from an FTP ``MLST`` or ``MLSD`` command.
520
521 :Parameters:
522 ftp_list_line : str
523 The line of output
524
525 :rtype: `FTPListData`
526 :return: An `FTPListData` object describing the parsed line, or
527 ``None`` if the line could not be parsed. Note that it's
528 possible for this method to return a partially-filled
529 `FTPListData` object (e.g., one without a mtime).
530 """
531 result = FTPListData(ftp_list_line)
532
533 parts = ftp_list_line.partition(' ')
534 result.name = parts[2]
535
536
537 if parts[0][-1] == ';':
538 for fact in parts[0][:-1].split(';'):
539 parts = fact.partition('=')
540 factname = parts[0].lower()
541 factvalue = parts[2]
542 if factname == 'unique':
543 if factvalue == "0g0" or factvalue == "0g1":
544
545
546 result.id_type = ID_TYPE.UNKNOWN
547 else:
548 result.id_type = ID_TYPE.FULL
549 result.id = factvalue
550 elif factname == 'modify':
551 result.mtime_type = MTIME_TYPE.LOCAL
552 result.mtime = calendar.timegm((int(factvalue[0:4]),
553 int(factvalue[4:6]),
554 int(factvalue[6:8]),
555 int(factvalue[8:10]),
556 int(factvalue[10:12]),
557 int(factvalue[12:14]),
558 0, 0, 0))
559 elif factname == 'size':
560 result.size = long(factvalue)
561 elif factname == 'sizd':
562
563 result.size = long(factvalue)
564 elif factname == 'type':
565 if factvalue.lower() == 'file':
566 result.try_retr = True
567 elif factvalue.lower() in ['dir', 'cdir', 'pdir']:
568 result.try_cwd = True
569 else:
570
571 result.try_retr = True
572 result.try_cwd = True
573 return result
574
575
576
577
578
580 """
581 Convenience function that instantiates an `FTPListDataParser` object
582 or `FTPMlstDataParser` (depending on the setting of `is_mlst`) and
583 and passes ``ftp_list_line`` to the object's ``parse_line()`` method,
584 returning the result.
585
586 :Parameters:
587 ftp_list_line : str
588 The line of output
589 is_mlst : bool
590 `true` if the server is known to support the FTP MLST command;
591 `false`, otherwise.
592
593 :rtype: `FTPListData`
594 :return: An `FTPListData` object describing the parsed line, or
595 ``None`` if the line could not be parsed. Note that it's
596 possible for this method to return a partially-filled
597 `FTPListData` object (e.g., one without a name).
598 """
599 if is_mlst:
600 return FTPMlstDataParser().parse_line(ftp_list_line)
601 else:
602 return FTPListDataParser().parse_line(ftp_list_line)
603
605 """
606 Issue the FTP FEAT command to an FTP server, and return the resulting
607 feature list as a dictionary.
608
609 Adapted from code by Andrew Scheller <gcode@loowis.durge.org>.
610
611 :Parameters:
612 ftp : open `FTP` object from Python's `ftplib`.
613
614 :rtype: `dict`
615 :return: A dict of features, or an empty dict if either (a) the
616 remote server supports no extra features, or (b) doesn't
617 support the FTP FEAT command.
618 """
619 features = dict()
620 try:
621 response = ftp.sendcmd("FEAT")
622 if response[:3] == "211":
623 for line in response.splitlines()[1:]:
624 if line[3] == "211":
625 break
626 if line[0] != ' ':
627 break
628 parts = line[1:].partition(' ')
629 features[parts[0].upper()] = parts[2]
630 except error_perm:
631
632 pass
633
634 return features
635
637 """
638 Convenience function to determine whether a remote FTP server supports
639 the MLST command or not. This method uses the FTP "FEAT" command to
640 query the capabilities of the server.
641
642 Adapted from code by Andrew Scheller <gcode@loowis.durge.org>.
643
644 :Parameters:
645 ftp : open `FTP` object from Python's `ftplib`.
646
647 :rtype: `bool`
648 :return: `true` if the server supports MLST; `false`, otherwise.
649 """
650 return 'MLST' in ftp_FEAT(ftp)
651
652
653
654
655
657 while s[i] == c:
658 i += 1
659 if i == len(s):
660 raise IndexError
661 return i
662
663
664
665
666
667 if __name__ == '__main__':
668 test_data = [
669
670
671 {'line': '+i9872342.32142,m1229473595,/,\tpub',
672 'type': 'ELPF',
673 'size': 0,
674 'time': (2008, 12, 16, 19, 26, 35, 0, 0, -1),
675 'name': 'pub',
676 'try_cwd': True},
677
678 {'line': '+i9872342.32142,m1229473595,r,s10376,\tREADME.txt',
679 'type': 'ELPF',
680 'size': 10376,
681 'time': (2008, 12, 16, 19, 26, 35, 0, 0, -1),
682 'name': 'README.txt',
683 'try_cwd': False},
684
685
686
687 {'line': '-rw-r--r-- 1 root other 531 Jan 29 03:26 README',
688 'type': 'Unix',
689 'size': 531,
690 'time': (current_year, 1, 29, 03, 26, 0, 0, 0, -1),
691 'name': 'README',
692 'try_cwd': False},
693
694 {'line': 'dr-xr-xr-x 2 root other 512 Apr 8 2003 etc',
695 'type': 'Unix',
696 'size': 512,
697 'time': (2003, 4, 8, 0, 0, 0, 0, 0, -1),
698 'name': 'etc',
699 'try_cwd': True},
700
701 {'line': '-rw-r--r-- 1 1356107 15000 4356349 Nov 23 11:34 09 Ribbons Undone.wma',
702 'type': 'Unix',
703 'size': 4356349,
704 'time': (current_year, 11, 23, 11, 34, 0, 0, 0, -1),
705 'name': '09 Ribbons Undone.wma',
706 'try_cwd': False},
707
708
709
710 {'line': '---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z',
711 'type': 'Windows',
712 'size': 1803128,
713 'time': (current_year, 7, 10, 10, 18, 0, 0, 0, -1),
714 'name': 'ls-lR.Z',
715 'try_cwd': False},
716
717 {'line': 'd--------- 1 owner group 0 May 9 19:45 foo bar',
718 'type': 'Windows',
719 'size': 0,
720 'time': (current_year, 5, 9, 19, 45, 0, 0, 0, -1),
721 'name': 'foo bar',
722 'try_cwd': True},
723
724
725
726 {'line': 'd [R----F--] supervisor 512 Jan 16 18:53 login',
727 'type': 'NetWare',
728 'size': 512,
729 'time': (current_year, 1, 16, 18, 53, 0, 0, 0, -1),
730 'name': 'login',
731 'try_cwd': True},
732
733
734
735 {'line': 'drwxrwxr-x folder 2 May 10 1996 bar.sit',
736 'type': 'NetPresenz/Mac',
737 'size': 2,
738 'time': (1996, 5, 10, 0, 0, 0, 0, 0, -1),
739 'name': 'bar.sit',
740 'try_cwd': True},
741
742
743
744 {'line': 'CORE.DIR;1 1 8-NOV-1999 07:02 [SYSTEM] (RWED,RWED,RE,RE)',
745 'type': 'MultiNet/VMS',
746 'size': 0,
747 'time': (1999, 11, 8, 7, 2, 0, 0, 0, -1),
748 'name': 'CORE',
749 'try_cwd': True},
750
751 {'line': '00README.TXT;1 2 30-DEC-1976 17:44 [SYSTEM] (RWED,RWED,RE,RE)',
752 'type': 'MultiNet/VMS',
753 'size': 0,
754 'time': (1976, 12, 30, 17, 44, 0, 0, 0, -1),
755 'name': '00README.TXT',
756 'try_cwd': False},
757
758 {'line': 'CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,)',
759 'type': 'MultiNet/VMS',
760 'size': 0,
761
762 'time': (1996, 1, 29, 03, 33, 0, 0, 0, -1),
763 'name': 'CII-MANUAL.TEX',
764 'try_cwd': False},
765
766
767
768 {'line': '04-27-00 09:09PM <DIR> licensed',
769 'type': 'MS-DOS',
770 'size': 0,
771 'time': (2000, 4, 27, 21, 9, 0, 0, 0, -1),
772 'name': 'licensed',
773 'try_cwd': True},
774
775 {'line': '11-18-03 10:16AM <DIR> pub',
776 'type': 'MS-DOS',
777 'size': 0,
778 'time': (2003, 11, 18, 10, 16, 0, 0, 0, -1),
779 'name': 'pub',
780 'try_cwd': True},
781
782 {'line': '04-14-99 03:47PM 589 readme.htm',
783 'type': 'MS-DOS',
784 'size': 589,
785 'time': (1999, 04, 14, 15, 47, 0, 0, 0, -1),
786 'name': 'readme.htm',
787 'try_cwd': False},
788 ]
789
791 error_message = '%s: ' % prefix if prefix else ''
792 error_message += 'Expected %s, got %s' % (expected_value, test_value)
793 assert test_value == expected_value, error_message
794
795
796 parser = FTPListDataParser()
797 i = 0
798 for test in test_data:
799 line = test['line']
800 prefix = 'Test %d (%s)' % (i, test['type'])
801 print '%s: "%s"' % (prefix, test['name'])
802 result = parser.parse_line(line)
803 assertEquals(result.raw_line, line, prefix)
804 assertEquals(result.size, test['size'], prefix)
805 assertEquals(result.name, test['name'], prefix)
806 assertEquals(result.try_cwd, test['try_cwd'], prefix)
807 expected_time = time.mktime(test['time'])
808 assertEquals(time.localtime(result.mtime),
809 time.localtime(expected_time),
810 prefix)
811 assertEquals(result.mtime, expected_time, prefix)
812 i += 1
813