看世界
 
昨日:篇  今日:篇   总帖:篇   会员:
admin
创始人Lv2   
全部楼盘     
# -*- coding: UTF-8 -*-
import os
import math
import urllib2
import urllib
import re
import time
import random
import sys
sys.path.append("../")
import MySQLdb
from ftplib import FTP
import shutil
import threading
import random
from random import choice



path= "D:\\down\\loupan\\chengdu\\"
# LS = [['温江','7','www.anjuke.com/chengdu/cm/wenjiang/'],['青羊','pn','www.anjuke.com/chengdu/cm/qingyang/'],['锦江','pn','www.anjuke.com/chengdu/cm/jinjiang/'],['金牛','pn','www.anjuke.com/chengdu/cm/jinniu/'],['武侯','pn','www.anjuke.com/chengdu/cm/wuhou/'],['成华','pn','www.anjuke.com/chengdu/cm/chenghua/'],['高新区','pn','www.anjuke.com/chengdu/cm/gaoxin/'],['天府新区','pn','www.anjuke.com/chengdu/cm/tainfuxinqu/'],['龙泉驿','pn','www.anjuke.com/chengdu/cm/longquanyi/'],['双流','pn','www.anjuke.com/chengdu/cm/shuangliu/'],['都江堰','pn','www.anjuke.com/chengdu/cm/dujiangyan/'],['郫都','pn','www.anjuke.com/chengdu/cm/piduqu/'],['新都','pn','www.anjuke.com/chengdu/cm/xindu/'],['青白江','pn','www.anjuke.com/chengdu/cm/qingbaijiangqu/'],['新津','pn','www.anjuke.com/chengdu/cm/xinjinxian/'],['金堂','pn','www.anjuke.com/chengdu/cm/jintangxian/'],['彭州','pn','www.anjuke.com/chengdu/cm/pengzhoushi/'],['崇州','pn','www.anjuke.com/chengdu/cm/chongzhoushi/'],['大邑','pn','www.anjuke.com/chengdu/cm/dayixian/'],['邛崃','pn','www.anjuke.com/chengdu/cm/qionglaishi/'],['蒲江','pn','www.anjuke.com/chengdu/cm/cdpujiangxian/'],['简阳','pn','www.anjuke.com/chengdu/cm/jianyangsh/']]

LS = [['大邑','2','www.anjuke.com/chengdu/cm/dayixian/'],['蒲江','2','www.anjuke.com/chengdu/cm/cdpujiangxian/']]



# LS = [['都江堰','7','www.anjuke.com/chengdu/cm/dujiangyan/'],['郫都','9','www.anjuke.com/chengdu/cm/piduqu/']
# LS = ['新都','8','www.anjuke.com/chengdu/cm/xindu/'],['青白江','3','www.anjuke.com/chengdu/cm/qingbaijiangqu/'],['新津','3','www.anjuke.com/chengdu/cm/xinjinxian/'],['金堂','4','www.anjuke.com/chengdu/cm/jintangxian/'],['彭州','3','www.anjuke.com/chengdu/cm/pengzhoushi/'],['崇州','4','www.anjuke.com/chengdu/cm/chongzhoushi/'],['大邑','3','www.anjuke.com/chengdu/cm/dayixian/'],['邛崃','3','www.anjuke.com/chengdu/cm/qionglaishi/'],['蒲江','2','www.anjuke.com/chengdu/cm/cdpujiangxian/'],['简阳','3','www.anjuke.com/chengdu/cm/jianyangsh/']
for l in LS:

    name = l[0]
    pn = l[1]

    # print pn
    url = l[2]

    txtpath = url.split('/')[-2]

    # print txtpath



    for x in range(1, len(l)):

        # print x
        pass






    # for line in open(l):
    #     lines = line.split(':')
    #
    #     # print lines
    #
    #
    #     pattern = re.compile(r':')  # 查找,
    #     result1 = pattern.findall(line)
    #     max = len(result1)
    #
    #     # print lines
    #
    #
    #
    #     lin = line.split(',')
    #
    #     # print lin[0]

    urls = ['https://www.anjuke.com/chengdu/cm/'+txtpath+'/p{}'.format(i) for i in range(1,int(pn))]

    print urls

    print 'L' + str(x) + ' = path + ' + "'" + txtpath + ".txt'"

    # time.sleep(6)

    header = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language':'zh-CN,zh;q=0.8',
        'Cache-Control':'no-cache',
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'
    }

    for url in urls:
        request = urllib2.Request(url,headers=header)
        response = urllib2.urlopen(request, timeout=15)
        data = response.read()
        response.close()
        # searchObj = re.findall(r'/shanghai/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I)
        searchObj = re.findall(r'/chengdu/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I)
        for i in searchObj:
            # print i +';',
            time.sleep(0.1)


            with open("D:\\down\\loupan\\test\\{}.txt".format(txtpath), "a") as f:
                f.write(name + str(i) +"," )



 0  已被阅读了35次  楼主 2018-10-12 00:17:28
回复列表

回复:全部楼盘

LOOK官方站 联系站长
Powered by Look博客