#! /usr/bin/python
# -*- coding: utf-8 -*- #
#
# @author: G1yuK
# @url: twitter.com/G1yuK
# Parser for otvet.mail.ru
#

import re
import urllib2
import os
import sys
import getopt

parse_mode = 1
# 0 - Пробегаемся по вопросам, 1 - По главным страницам ответов
depth_pages = 100        # Глубина "пробега"
name_fout = 'emails.txt' # Файл для вывода
s = 1                    # Откуда начать "пробег"

need_link = 'http://otvet.mail.ru'

def banner(): 
    if os.name == "posix": 
        os.system("clear") 
    else: 
        os.system("cls") 
    print """
#############################################################
#                                                           #
# otvets_parser.py              c0der: twitter.com/G1yuK    #
#                                                           #
#                                                           #
# Options:                                                  #
#         -n depth of parsing(default: 100)                 #
#         -p parse mode, 0 or 1 (default: 1)                #
#         -o outfile name (default: emails.txt)             #
#         -s from start                                     #
#                                                           #
# Thanks: www.gfs-team.ru && www.xaknet.ru                  #
#                                                           #
#############################################################

"""

def parse_otvet(urlka):
    reg = r'href=\"#url\?to=(.*?)@(.*?)\"'
    bigre = re.compile(reg)
    ar_mails = []
    try:
        all_page = urllib2.urlopen(urlka).read()
    except urllib2.URLError, e:
        print urlka + '\t' + str(e) + '\n'
        return False
    for one_match in re.findall(bigre, all_page):
        milo = one_match[0] + '@' + one_match[1]
        ar_mails.append(milo)
    return ar_mails

if __name__ == "__main__":
    banner()

    try:
        opt, args = getopt.getopt(sys.argv[1:], "n:p:s:o:",['depth_pages',
            'parse_mode','s','output_file'])
        options=dict(opt)
    except getopt.GetoptError, err:
        print err
    
    if "-n" in options:
        try:
            depth_pages = int(options["-n"])
            if depth_pages < 0:
                    raise Exception
        except Exception:
            print ""
            print "[!] Error in \"-n\""
            print ""
            sys.exit(-1)

    if "-p" in options:
        try:
            parse_mode = int(options["-p"])
            if parse_mode != 0 and parse_mode != 1:
                raise Exception
        except Exception:
            print ""
            print "[!] Error in \"-p\""
            print ""
            sys.exit(-1)
    if "-s" in options:
            s = int(options["-s"])

    if "-o" in options:
            name_fout = options["-o"]

    try:
        fout = open(name_fout, 'w')
    except IOError, e:
        print e
        sys.exit()
    result = []
    for i in xrange(s, s + depth_pages):
        if parse_mode:
            result.extend(parse_otvet(need_link + '/?pg=' + str(i)))
        else:
            result.extend(parse_otvet(need_link + '/question/' + str(i)))
    for elem in set(result):
        fout.write(elem + '\n')
    fout.close()
    raw_input('Finish.\nPress Enter...')
