#!/usr/bin/env python
# -*- coding:utf8 -*-  
# @Author  : c32 (amd5@qq.com)
# @Blog    : http://www.19aq.com/
# @Version : 
# @DateTime:  2018-4-14 15:50:41
 
import urllib
import urllib2
import MySQLdb
import re

# import sys


conn = MySQLdb.connect(host='c32.19aq.com', user='www_yese_com',
                    passwd='www_yese_com', db='www_yese_com', port = 3306, charset = 'utf8')

headers   = {'cookie':'YeSeEden=login=OK&lasttime=122%2E190%2E94%2E123&icount%5Ftime=03%2D12+23%3A32&Avatar=0&Recommended=Null&area%5Fb=%CC%EC%BD%F2%CA%D0&cjx=&area%5Fs=%BA%CD%C6%BD%C7%F8&flag=1&truemember=0&grade=1&email=&id=2496224&sex=%AEI&password=&username=pdwl; expires=Tue, 13-Mar-2018 03:32:48 GMT; path=/'}
# session = requests.session()
#基础信息
def main():
    url  = "https://www.yesejiaoyou.com/sub/Check_login.asp"
    data = {
    'Key_Username':'c3253220',
    'Key_Password':'3253220'
    }
    return post(url, data)


#登陆网站
def post(url, data):
    req      = urllib2.Request(url)
    data     = urllib.urlencode(data)
    #enable cookie
    opener   = urllib2.build_opener(urllib2.HTTPCookieProcessor())
    response = opener.open(req, data)
    print response.read()   #打印登陆状态   1  正常
    return personal()
    #return response.read() #返回登陆状态

#进入个人中心
def personal():
    link      = 'https://www.yesejiaoyou.com/personal/'
    url         = 'https://www.yesejiaoyou.com/member/'
    # opener   = urllib2.build_opener()
    # opener.addheaders.append(('Cookie', cookie))
    # f        = opener.open(url)
    req  = urllib2.Request(link, headers=headers)
    r    = urllib2.urlopen(req)
    html = r.read()
    # return html
    return getList(url)

#获取页数
def getList(url):
    req     = urllib2.Request(url, headers=headers)
    r       = urllib2.urlopen(req)
    html    = r.read()
    print '---------getList--------'
    print findPage(html)
    
    # print html
    # return html

def findPage(html):
    items = re.findall('gt;</a> <a href="http://c32.19aq.com/Linux/Python/(.*?)">&gt;&gt;</a> </div>', html, re.S)
    # return myItems
    # print items
    for item in items:
        str = item.split('=')
        page = str[1]     #取出页数
        print '--------findPage---------'
        # print page
        print getUrl(page)

def findList(html):     #正则匹配列表
    myItems = re.findall('<dd class="pf_ddtop">[\w\W]*<span class="pfs1"><a href="http://c32.19aq.com/Linux/Python/(.*?)" target="_blank">(.*?)</a></span>[\w\W]*<span class="pfs4">(.*?)</span>[\w\W]*</dd>[\w\W]*<dd>[\w\W]*<span class="yellow">(.*?)</span>', html, re.S)
    print '--------findList----000-----'
    # print html
    print '--------findList----111-----'
    # print myItems
    print '--------findList----222-----'
    return myItems

def urlPages(pages):     
    url = 'https://www.yesejiaoyou.com/member/?page=' + str(pages)
    print '--------urlPages---------'
    return url

def getListurl(url):
    req     = urllib2.Request(url, headers=headers)
    data    = urllib2.urlopen(req)
    html    = data.read()
    # data.close()
    print '--------getListurl---------'
    return html

def getUrl(page):
    for pages in range(1, int(page)+1):
        html  = getListurl(urlPages(pages))
        print '--------getUrl----000-----'
        # print html
        items = findList(html)
        print '--------getUrl----111-----'
        for item in items:
            # print item
            # print item[0]
            a = item[1].decode('gb2312','ignore').encode('utf-8')  
            b = item[3].decode('gb2312','ignore').encode('utf-8')  
            # print items[0]
            print '--------getUrl----222----'
            cur = conn.cursor()
            cur.execute('INSERT INTO list (url, name, shuxing) VALUES (%s, %s, %s)',(item[0],a,b))
            conn.commit()
            # print pages
            # items = findList(html)



if __name__ == '__main__':
    main()
    # personal()
    # page()
    conn.close()




# def getHtml(url):   #获取html源码
#     page = urllib2.urlopen(url)
#     html = page.read()
#     return html


# html = getHtml(url)
# print html



# test = logina
# print logina(url,userpass,headers)















# req  = urllib2.Request(link)
# page = urllib2.urlopen(req)
# html = page.read()
# print '========================'
# print '你好'
# print html
# page.close()