看世界
 
昨日:篇  今日:篇   总帖:篇   会员:
admin
创始人Lv1   
小区楼盘     
#!/usr/bin/python
# -*- coding: UTF-8 -*-

#2018年4月20日16:52:59 完成
#21zgxdnet
import os
import math
import urllib2
import urllib
import re
import time
import random
import sys
sys.path.append("../")
import MySQLdb
from ftplib import FTP
import shutil
import threading
import random
from random import choice


# urls = ['https://www.anjuke.com/shanghai/cm/putuo/p{}'.format(str(i)) for i in range(1,11)]


# urls = ['https://www.anjuke.com/chengdu/cm/wuhou/p{}'.format(str(i)) for i in range(1,15)]

urls = ['https://www.anjuke.com/chengdu/cm/gaoxin/p{}'.format(str(i)) for i in range(1,8)]




# https://www.anjuke.com/shanghai/cm/pudong/p13

# https://www.anjuke.com/chengdu/cm/qingyang/p13/


# urls = ['https://www.yrw.com/products/list-direct-all-performance-1-createTimeDesc-{}.html'.format(str(i)) for i in range(1,11)]



# urls = ['https://www.yrw.com/products/list-direct-all-performance-1-createTimeDesc-{}.html'.format(str(i)) for i in range(1,11)]

header = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language':'zh-CN,zh;q=0.8',
    'Cache-Control':'no-cache',
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'
}

for url in urls:
    request = urllib2.Request(url,headers=header)
    response = urllib2.urlopen(request, timeout=15)
    data = response.read()
    response.close()
    # searchObj = re.findall(r'/shanghai/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I)
    searchObj = re.findall(r'/chengdu/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I)
    for i in searchObj:
        print i +';',
        time.sleep(0.1)
        with open("D:\\down\\loupan\\chengdu\\gaoxin.txt", "a") as f:
            f.write("高新区" +"'" + str(i) +"'," )



pass


 0  已被阅读了29次  楼主 2018-10-09 22:25:35
回复列表

回复:小区楼盘

LOOK官方站 联系站长
Powered by Look博客