看世界
 
昨日:篇  今日:篇   总帖:篇   会员:
今日:0    总帖:57
admin
19
import urllib2 # 导入urllib2模块 req = urllib2.urlopen('https://www.nuomi.com/?cid=002540') buf = req.read() import re # 导入re模块 listurl = re.findall(r'http:.+\.jpg', buf) # 正则表达式,匹配图片格式 print listurl # 将图片的格式放入list中 i = 0 for url in listurl: f = open(str(i) + '.jpg', "wb") # 打开文件 req = urllib2.urlopen(url) buf = req.read() # 读出文件 f.write(buf) # 写入文件 i = i + 1 # 更改文件名
0 0 27天前
admin
20
# coding:utf8 import urllib import re def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) # 匹配出所有的图片链接,返回结果为url列表z:\\ imglist = re.findall(imgre, html) x = 0 for imgurl in imglist: print(imgurl) # 将每张图片保存到本地 urllib.urlretrieve(imgurl, 'z:\\%s.jpg' % x) x += 1 html = getHtml("http://tieba.baidu.com/p/2460150866") print getImg(html)
0 0 27天前
admin
17
import urllib def callbackfunc(blocknum, blocksize, totalsize): '''回调函数 @blocknum: 已经下载的数据块 @blocksize: 数据块的大小 @totalsize: 远程文件的大小 ''' percent = 100.0 * blocknum * blocksize / totalsize if percent > 100: percent = 100 print "%.2f%%"% percent url = 'https://n.sinaimg.cn/news/680/w300h380/20181022/2qG_-hmuuiyv6374339.jpg' local = 'z:\\333.jpg' urllib.urlretrieve(url, local, callbackfunc)urllib.urlretrieve(url, local, callbackfunc)
0 0 27天前
admin
26
path = r'z:\list\1' def checkDocuments(path): if os.path.exists(path) == False: os.makedirs(path) checkDocuments(path)
0 0 28天前
admin
28
<style type="text/css"> <!-- .foot1 {color: #ccc} .user-foot a{font-family: "微软雅黑",Microsoft YaHei;color: #ccc} .user-foot p{font-family: "微软雅黑",Microsoft YaHei;color: #ccc} .user-foot a:link{color: #ccc} .user-foot a:hover{color: #eee} --> </style> <!-- Footer ================================================== --> <footer class="bs-docs-footer user-foot visible-lg"> <div class="container"> <ul class="bs-docs-footer-links user-foot-link"> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=青羊">青羊二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=锦江">锦江二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=金牛">金牛二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=武侯">武侯二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=成华">成华二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=高新区">高新区二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=天府新区">天府新区二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=温江">温江二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=龙泉驿">龙泉驿二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=双流">双流二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=都江堰">都江堰二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=郫都二手房">郫都二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=新都">新都二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=青白江">青白江二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=新津二手房">新津二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=金堂">金堂二手房</a></li> <li><a href="/e/action/ListInfo.php?classid=17&ph=1&myarea=彭州">彭州二手房</a></li> </ul> <p><a href="/" target="_blank" title="<?=$public_r[sitename]?>二手房"><?=$public_r[sitename]?>二手房</a>提供免费快捷的二手房交易信息 在线发布出售,求购,转让二手房</p> <p class="pull-right"><a href="#top">返回顶部</a></p> </div> </footer>>
0 0 29天前
admin
18
# -*- coding: utf-8 -*- import MySQLdb import re import math import random from random import choice import time from faker import Faker import os fake = Faker("zh_CN") import sys # reload()之前必须要引入模块 reload(sys) sys.setdefaultencoding('utf-8') # 连接MySQL数据库 # db = MySQLdb.connect("127.0.0.1", "root", "jBI5cAhj676BI5%", "house", charset="utf8") db = MySQLdb.connect("192.168.0.103", "root", "jBI5cAhj676BI5%", "cd_zgxd_net_bak", charset="utf8") print db # db = MySQLdb.connect(host="cd.zgxd.net", port=3306, user="cd_zgxd_net",passwd="jBI5cAhj676BI5%",db="cd_zgxd_net",charset="utf8") cursor = db.cursor() # for i in range(1,5): path2 = "D:\\down\\fenlei\\chengdu\\" AD1 = path2 +'cdpujiangxian.txt' AD2 = path2 +'chenghua.txt' AD3 = path2 +'chongzhoushi.txt' AD4 = path2 +'dayixian.txt' AD5 = path2 +'dujiangyan.txt' AD6 = path2 +'gaoxin.txt' AD7 = path2 +'jianyangsh.txt' AD8 = path2 +'jinniu.txt' AD9 = path2 +'jintangxian.txt' AD10 = path2 +'Jjinjiang.txt' AD11 = path2 +'longquanyi.txt' AD12 = path2 +'pengzhoushi.txt' AD13 = path2 +'piduqu.txt' AD14 = path2 +'qingbaijiangqu.txt' AD15 = path2 +'qingyang.txt' AD16 = path2 +'qionglaishi.txt' AD17 = path2 +'shuangliu.txt' AD18 = path2 +'tainfuxinqu.txt' AD19 = path2 +'wenjiang.txt' AD20 = path2 +'wuhou.txt' AD21 = path2 +'xindu.txt' AD22 = path2 +'xinjinxian.txt' LS = [AD1, AD2, AD3, AD4, AD5, AD6, AD7, AD8, AD9, AD10, AD11, AD12, AD13, AD14, AD15, AD16, AD17, AD18, AD19, AD20, AD21, AD22] # LS = [AD20] # LS=[L1, L2] for l in LS: sales = ['出售', '急售', '诚意出售', '个人出售', '诚售'] subways = ['近地铁', ''] # huxins = ['单间配套','一室一厅','二室一厅','三室一厅','三室二厅'] huxins = ['单间配套', '一室一厅', '二室一厅'] huxin_bs = ['三室二厅', '四室二厅'] spaces_bs = ['131'] prices_bs = ['60', '63', '65', '68', '70', '75', '78', '82', '85', '86'] huxin_gg = ['单间配套', '一室一厅', '二室一厅'] spaces_gg = ['63', '65', '72', '78', '85', '86', '93', '95', '103', '111', '120', '126', '131'] prices_gg = ['60', '63', '65', '68', '70', '75', '78', '82', '85', '86'] spaces = ['63', '65', '72', '78', '85', '86', '93', '95', '103', '111', '120', '126', '131'] prices = ['60', '63', '65', '68', '70', '75', '78', '82', '85', '86'] loupan = [] fbnxs = ['房本满二年', '房本满五年', '其他'] fwlxs = ['公寓', '普通住宅', '商住楼', '商品房'] fwcxs = ['东', '南', '西', '北'] zxcds = ['毛坯', '简装', '精装'] wyzfs = ['是', '否'] email = [] mycontact = [] # sql = "select max(id) from `phome_ecms_info1_data_1`" # sql = "select max(id) from `phome_ecms_info1`" sql = "select max(id) from `phome_ecms_info1`" cursor.execute(sql) id_info = cursor.fetchone() ClassId = '17' # QQ = '527573696' Writer = "成都干洗" Befrom = "zgxd.net" Company = "成都二手房" # Phone = '13983000191' dq = '成都市' print id_info if id_info[0] != None: g_id = id_info[0] else: g_id = 0 for line in open(l): lines = line.split(',') pattern = re.compile(r',') # 查找, result1 = pattern.findall(line) max = len(result1) + 1 print 'max:' + str(max) total = max + g_id print "total:" + str(total) n = 1 for ad in lines: if re.search('(墅)', ad): ad_bs = ad huxin = ['三室二厅', '四室二厅','五室二厅'][random.randint(0, 2)] space = random.randint(108, 218) # 面积 price = str(random.randint(180, 398)) # 低价楼盘 # dj = round((int(price) / float(space)), 2) # smalltext = ad +'超低容积率,品质铸造,赠送面积大,刚改首选' g_id = g_id + 1 dianji_i = random.randint(16858, 39899) myarea = ad.split('·')[0] loupan = ad.split('·')[-1] man = fake.last_name().encode('utf-8') + '先生' # 男性姓名 woman = fake.last_name().encode('utf-8') + '女士' # 女性姓名 QQ = random.randint(56858, 899989988) name = [man, woman] name1 = choice(name) data = str(fake.date_between(start_date="-2y", end_date="now")) phone = str(fake.phone_number()) sale = choice(sales) subway = choice(subways) huxin = choice(huxins) # 户型 space = random.randint(45, 85) # 面积 price = str(random.randint(68, 121)) # 低价楼盘 dj = round((int(price) / float(space)), 2) price = str(price) + '万元' fbnx = choice(fbnxs) # 房本类型price fwlx = '别墅' # 房屋类型 fwcx = choice(fwcxs) # 房屋朝向 zxcd = choice(zxcds) # 装修程度 wyzf = choice(wyzfs) # 唯一住房 email = fake.email() # email = fake.company_email() mycontact = str(fake.phone_number()) address = myarea title1 = sale + fwlx title2 = sale + loupan title3 = loupan + sale titles = [title1, title2, title3] title = choice(titles) i = ad newstext = loupan detail_title = i infotag = fwlx detail_keyboard = fwlx smalltext = title + '超低容积率,品质铸造,赠送面积大,刚改首选,' + zxcd +','+'房屋朝向:' + fwcx titlepic = 'http://www.pictutu.com/img/house/bs/' + str(random.randint(1, 1000)) + '.jpg' sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`, `keyid`, `userid`,`username`, `checked`,`truetime`,`filename`, `title`,`newstime`,`titlepic`,`closepl`, `havehtml`, `smalltext`,`myarea`, `stb`,`copyids`, `infotags`,`ispic`, `subway`, `huxin`,`space`,`price`,`loupan`,`dj`) values (%s,%s,%s,'%s','%s', '%s', '%s', '%s', '1', %s, %s, '%s', %s,'%s','0','1', '%s','%s', '1', '0', '%s','1', '%s', '%s','%s', '%s','%s','%s');" \ % (g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), fwlx, '', '0', name1, int(time.time()), g_id, title, int(time.time()), titlepic, smalltext, myarea, infotag, subway, huxin, space, price, loupan, dj) cursor.execute(sql) sql = "insert into `phome_ecms_info1_data_1` (`id`, `classid`, `email`,`mycontact`,`address`,`fbnx`,`fwlx`,`fwcx`,`zxcd`,`wyzf`) values ( %s, %s, '%s',%s,'%s','%s','%s','%s','%s','%s');" % ( g_id, ClassId, email, mycontact, loupan, fbnx, fwlx, fwcx, zxcd, wyzf) cursor.execute(sql) if re.search('(公馆|宿舍)', ad): ad_gg =ad huxin = ['单间配套', '一室一厅', '二室一厅'][random.randint(0,2)] space = random.randint(40, 68) # 面积 price = str(random.randint(100, 120)) # 低价楼盘 # smalltext = ad +'交通便利,成熟地段,商场,银行多,总价低' # print "公馆|宿舍::" + huxin,ad,space,price g_id = g_id + 1 dianji_i = random.randint(16858, 39899) myarea = ad.split('·')[0] loupan = ad.split('·')[-1] man = fake.last_name().encode('utf-8') + '先生' # 男性姓名 woman = fake.last_name().encode('utf-8') + '女士' # 女性姓名 QQ = random.randint(56858, 899989988) name = [man, woman] name1 = choice(name) data = str(fake.date_between(start_date="-2y", end_date="now")) phone = str(fake.phone_number()) sale = choice(sales) subway = choice(subways) huxin = choice(huxins) # 户型 space = random.randint(45, 85) # 面积 price = str(random.randint(68, 121)) # 低价楼盘 dj = round((int(price) / float(space)), 2) price = str(price) + '万元' fbnx = choice(fbnxs) # 房本类型price fwlx = choice(fwlxs) # 房屋类型 fwcx = choice(fwcxs) # 房屋朝向 zxcd = choice(zxcds) # 装修程度 wyzf = choice(wyzfs) # 唯一住房 email = fake.email() # email = fake.company_email() mycontact = str(fake.phone_number()) address = myarea title1 = sale + huxin title2 = sale + loupan title3 = loupan + sale titles = [title1, title2, title3] title = choice(titles) i = ad newstext = loupan detail_title = i infotag = fwlx detail_keyboard = fwlx smalltext = title + '交通便利,成熟地段,商场,银行多,总价低,' + zxcd +','+'房屋朝向:' + fwcx titlepic = 'http://www.pictutu.com/img/house/' + str(random.randint(1, 5000)) + '.jpg' sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`, `keyid`, `userid`,`username`, `checked`,`truetime`,`filename`, `title`,`newstime`,`titlepic`,`closepl`, `havehtml`, `smalltext`,`myarea`, `stb`,`copyids`, `infotags`,`ispic`, `subway`, `huxin`,`space`,`price`,`loupan`,`dj`) values (%s,%s,%s,'%s','%s', '%s', '%s', '%s', '1', %s, %s, '%s', %s,'%s','0','1', '%s','%s', '1', '0', '%s','1', '%s', '%s','%s', '%s','%s','%s');" \ % (g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), fwlx, '', '0', name1, int(time.time()), g_id, title, int(time.time()), titlepic, smalltext, myarea, infotag, subway, huxin, space, price, loupan, dj) cursor.execute(sql) sql = "insert into `phome_ecms_info1_data_1` (`id`, `classid`, `email`,`mycontact`,`address`,`fbnx`,`fwlx`,`fwcx`,`zxcd`,`wyzf`) values ( %s, %s, '%s',%s,'%s','%s','%s','%s','%s','%s');" % ( g_id, ClassId, email, mycontact, loupan, fbnx, fwlx, fwcx, zxcd, wyzf) cursor.execute(sql) # sql = "INSERT INTO ttt(smalltext,title) VALUES ('%s','%s')" % (smalltext, ad) # cursor.execute(sql) if not re.search('(公馆|宿舍|墅)', ad): huxin = ['二室一厅','三室一厅','三室二厅'][random.randint(0, 2)] space = random.randint(55, 120) # 面积 price = str(random.randint(115, 180)) # 低价楼盘 dj = round((int(price) / float(space)), 2) smalltext = str(ad + '交通便利,成熟地段,户型合理,朝向好,低总价') titlepic = 'http://www.pictutu.com/img/house/' + str(random.randint(1, 5000)) + '.jpg' g_id = g_id + 1 dianji_i = random.randint(16858, 39899) myarea = ad.split('·')[0] loupan = ad.split('·')[-1] man = fake.last_name().encode('utf-8') + '先生' # 男性姓名 woman = fake.last_name().encode('utf-8') + '女士' # 女性姓名 QQ = random.randint(56858, 899989988) name = [man, woman] name1 = choice(name) data = str(fake.date_between(start_date="-2y", end_date="now")) phone = str(fake.phone_number()) sale = choice(sales) subway = choice(subways) huxin = choice(huxins)#户型 space = random.randint(45, 85)#面积 price = str(random.randint(68, 121))#低价楼盘 dj = round((int(price) / float(space)),2) price = str(price)+'万元' fbnx = choice(fbnxs)#房本类型price fwlx = choice(fwlxs)#房屋类型 fwcx = choice(fwcxs)#房屋朝向 zxcd = choice(zxcds)#装修程度 wyzf = choice(wyzfs)#唯一住房 email = fake.email() # email = fake.company_email() mycontact = str(fake.phone_number()) address = myarea title1 = sale + huxin title2 = sale + loupan title3 = loupan +sale title4 = loupan + huxin +sale title5 = huxin +' ' +loupan + sale title6 = sale + loupan+ huxin title7 = sale + ad +' ' + huxin titles = [title1,title2,title3,title4,title5,title6,title7] title = choice(titles) i = ad newstext = loupan smalltext = title + '交通便利,成熟地段,商场,银行多,总价低,可按揭,首次置业首选,' + zxcd +','+'房屋朝向:' + fwcx detail_title = i infotag = fwlx detail_keyboard = fwlx sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`, `keyid`, `userid`,`username`, `checked`,`truetime`,`filename`, `title`,`newstime`,`titlepic`,`closepl`, `havehtml`, `smalltext`,`myarea`, `stb`,`copyids`, `infotags`,`ispic`, `subway`, `huxin`,`space`,`price`,`loupan`,`dj`) values (%s,%s,%s,'%s','%s', '%s', '%s', '%s', '1', %s, %s, '%s', %s,'%s','0','1', '%s','%s', '1', '0', '%s','1', '%s', '%s','%s', '%s','%s','%s');" \ % (g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), fwlx, '', '0', name1, int(time.time()), g_id, title, int(time.time()), titlepic, smalltext, myarea, infotag, subway, huxin, space, price, loupan, dj) cursor.execute(sql) sql = "insert into `phome_ecms_info1_data_1` (`id`, `classid`, `email`,`mycontact`,`address`,`fbnx`,`fwlx`,`fwcx`,`zxcd`,`wyzf`) values ( %s, %s, '%s',%s,'%s','%s','%s','%s','%s','%s');" % ( g_id, ClassId, email, mycontact, loupan, fbnx, fwlx, fwcx, zxcd, wyzf) cursor.execute(sql) # sql = "INSERT INTO test(smalltext,title) VALUES ('%s','%s')" % (smalltext, ad) # cursor.execute(sql) db.close() # g_id = g_id + 1 # dianji_i = random.randint(16858, 39899) # # myarea = ad.split('·')[0] # loupan = ad.split('·')[-1] # man = fake.last_name().encode('utf-8') + '先生' # 男性姓名 # woman = fake.last_name().encode('utf-8') + '女士' # 女性姓名 # QQ = random.randint(56858, 899989988) # name = [man, woman] # name1 = choice(name) # data = str(fake.date_between(start_date="-2y", end_date="now")) # phone = str(fake.phone_number()) # # dq = '成都' + ad # # # sale = choice(sales) # subway = choice(subways) # huxin = choice(huxins)#户型 # space = random.randint(45, 85)#面积 # price = str(random.randint(68, 121))#低价楼盘 # dj = round((int(price) / float(space)),2) # price = str(price)+'万元' # fbnx = choice(fbnxs)#房本类型price # fwlx = choice(fwlxs)#房屋类型 # fwcx = choice(fwcxs)#房屋朝向 # zxcd = choice(zxcds)#装修程度 # wyzf = choice(wyzfs)#唯一住房 # # email = fake.email() # # email = fake.company_email() # mycontact = str(fake.phone_number()) # # # # # address = myarea # # # # print wyzf # # print zxcd # # title1 = sale + huxin # title2 = sale + loupan # title3 = loupan +sale # # # # titles = [title1,title2,title3] # # title = choice(titles) # # # # i = ad # # # newstext = loupan # # smalltext = newstext # detail_title = i # # infotag = fwlx # detail_keyboard = fwlx # # # # print i # # # # # sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`,`keyid`,`useid`,`usename`,`tzid`,`smalltext`) values (%s,%s,%s,'%s','%s',%s,%s,'%s',%s'%s');" %(g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()),'%s','','','%s','%s','ad') # # cursor.execute(sql) # ############### #`truetime`,`filename` #title#newstime`titlepic``closepl``havehtml`, # sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`, `keyid`, `userid`,`username`, `checked`,`truetime`,`filename`, `title`,`newstime`,`titlepic`,`closepl`, `havehtml`, `smalltext`,`myarea`, `stb`,`copyids`, `infotags`,`ispic`, `subway`, `huxin`,`space`,`price`,`loupan`,`dj`) values (%s,%s,%s,'%s','%s', '%s', '%s', '%s', '1', %s, %s, '%s', %s,'%s','0','1', '%s','%s', '1', '0', '%s','1', '%s', '%s','%s', '%s','%s','%s');" \ # % (g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), fwlx, '' ,'0', name1, int(time.time()), g_id, title,int(time.time()),'titlepic', smalltext,myarea, infotag, subway, huxin,space,price,loupan,dj) # cursor.execute(sql) # # # # sql = "insert into `phome_ecms_info1_data_1` (`id`, `classid`, `email`,`mycontact`,`address`,`fbnx`,`fwlx`,`fwcx`,`zxcd`,`wyzf`) values ( %s, %s, '%s',%s,'%s','%s','%s','%s','%s','%s');" %(g_id, ClassId, email, mycontact,loupan,fbnx,fwlx,fwcx,zxcd,wyzf) # cursor.execute(sql) # # # # sql = "insert into `phome_ecms_info1` (`id`, `classid`, `onclick`, `newspath`,`keyboard`, `keyid`, `userid`,`username`, `checked`,`truetime`,`filename`, `title`,`newstime`,`titlepic`,`closepl`, `havehtml`, `smalltext`,`myarea`, `stb`,`copyids`, `infotags`,`ispic`, `subway`, `huxin`,`space`,`price`,`loupan`,`dj`) values (%s,%s,%s,'%s','%s', '%s', '%s', '%s', '1', %s, %s, '%s', %s,'%s','0','1', '%s','%s', '1', '0', '%s','1', '%s', '%s','%s', '%s','%s','%s');" % (g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), fwlx, '', '0', name1, int(time.time()), g_id, title, int(time.time()), 'titlepic', smalltext, myarea, infotag, subway, huxin, space, price, loupan, dj) # cursor.execute(sql) # # sql = "insert into `phome_ecms_info1_data_1` (`id`, `classid`, `email`,`mycontact`,`address`,`fbnx`,`fwlx`,`fwcx`,`zxcd`,`wyzf`) values ( %s, %s, '%s',%s,'%s','%s','%s','%s','%s','%s');" % (g_id, ClassId, email, mycontact, loupan, fbnx, fwlx, fwcx, zxcd, wyzf) # cursor.execute(sql) # # try: # # db.commit() # n += 1 # except: # db.rollback() # print("insert error") # db.close() # print '插入完成' + str(max) + '记录' print '当前已经完成的ID:' + str(total) print "当前进程工作目录 : %s" % os.getcwd()"D:\Program Files (x86)\Anaconda2\python.exe" "D:/Program Files/JetBrains/PyCharm 2017.3.3/helpers/pydev/二手房出售_价格面积分类.py"
0 0 29天前
admin
25
# -*- coding: UTF-8 -*- import os import math import urllib2 import urllib import re import time import random import sys sys.path.append("../") import MySQLdb from ftplib import FTP import shutil import threading import random from random import choice path= "D:\\down\\loupan\\chongqing\\"#文件路径 LS = [['九龙坡','10','https://www.anjuke.com/chongqing/cm/jiulongpo/']] for l in LS: name = l[0] pn = l[1] print pn print 'name:' + name url = l[2] txtpath = url.split('/')[-2] city = url.split('/')[-4] print txtpath urls = ['https://www.anjuke.com/'+city+'/cm/'+txtpath+'/p{}'.format(i) for i in range(1,int(pn))] print urls time.sleep(1) header = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'no-cache', 'User-Agent':'Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit /537.36(KHTML, like Gecko) Chrome/70.0.3510.2 Safari/537.36' } for url in urls: request = urllib2.Request(url,headers=header) response = urllib2.urlopen(request, timeout=15) data = response.read() response.close() searchObj = re.findall(r'/'+city+ '/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I) # searchObj = re.findall(r'/chengdu/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I) for i in searchObj: print name +'·' +i +';', time.sleep(1.1) with open("D:\\down\\fenlei\\"+city+"\\{}.txt".format(txtpath), "a") as f: f.write(name +'·'+ str(i) +"," ) f.close() print 'D:\\down\\fenlei\\"+city+"\\{}.txt' "D:\Program Files (x86)\Anaconda2\python.exe" "D:/Program Files/JetBrains/PyCharm 2017.3.3/helpers/pydev/爬楼批量1012.py"
0 0 29天前
admin
16
# -*- coding: utf-8 -*- import os import os.path from PIL import Image import shutil list1 = [] def get_py(path,list1): fileList = os.listdir(path) # 获取path目录下所有文件 for filename in fileList: pathTmp = os.path.join(path,filename) # 获取path与filename组合后的路径 if os.path.isdir(pathTmp): # 如果是目录 get_py(pathTmp,list1) # 则递归查找 elif filename[-4:] == '.jpg':# 如果不是目录,则比较后缀名 list1.append(pathTmp) # path = 'D:\web\PHPnow-1.5.6\\vhosts\img.pictutu.com\\img' path = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\list\1' path = 'z:\\100' # path = 'z:\\TEST' get_py(path,list1) print('在%s目录及其子目录下找到%d个jpg文件,分别为:' % ( path,len(list1))) for filename in list1: # print(path) img = Image.open(filename) w = float(img.size[0]) h = float(img.size[1]) print w print h b= w/h if b > 1.25 : path1 = "z:\\new\\1\\" print (b) # path1 = "z:\\N123\\1\\" path1 = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\list\list1' shutil.copy(filename, path1) # print 'type1:' + str(len(filename)) elif 0.9< b<=1.25: # print '正方形图片' # path2 = "z:\\N123\\2\\" # path2 = "z:\\new\\2\\" path2 = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\list\list2' shutil.copy(filename, path2) # print'type2:' + str(len(filename)) else: # print "竖型图片" # path3 = "z:\\new\\3\\" # path3 = "z:\\N123\\3\\" path3 = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\list\list3' shutil.copy(filename, path3)
0 0 30天前
admin
23
# -*- coding: utf-8 -*- __author__ = 'Administrator' import os import re import sys path = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\1' newpath = r'D:\web\PHPnow-1.5.6\vhosts\img.pictutu.com\img\new' def renameall(): fileList = os.listdir(path) # 待修改文件夹 print("修改前:"+str(fileList)) #输出文件夹中包含的文件 currentpath = os.getcwd() #得到进程当前工作目录 os.chdir(path) #将当前工作目录修改为待修改文件夹的位置 num = 1 for fileName in fileList: pat = ".+?\.(JPG|jpg)" # 匹配文件名正则表达式 pattern = re.findall(pat,fileName) #进行匹配 # os.rename(fileName,('a' + str(num)+'.'+'jpg')) os.rename(fileName, (str(num) + '.' + 'jpg')) #文件重新命名 num = num+1 print("---------------------------------------------------") os.chdir(currentpath) # 改回程序运行前的工作目录 sys.stdin.flush() #刷新 renameall()"D:\Program Files (x86)\Anaconda2\python.exe" C:/Users/Administrator/PycharmProjects/RE_NAME_P3.py1. 修改为当前 文件夹2. 可能需要 中间名 中转
0 0 30天前
admin
26
[Image]
0 0 30天前
快速发帖 高级模式
LOOK官方站 联系站长
Powered by Look博客