看世界
 
昨日:篇  今日:篇   总帖:篇   会员:
今日:0
文章:0
今日:0
文章:5
IT
今日:0
文章:57
今日:0    总帖:65
admin
27
# -*- coding: utf-8 -*- import os import os.path from PIL import Image import shutil list1 = [] def get_py(path,list1): fileList = os.listdir(path) # 获取path目录下所有文件 for filename in fileList: pathTmp = os.path.join(path,filename) # 获取path与filename组合后的路径 if os.path.isdir(pathTmp): # 如果是目录 get_py(pathTmp,list1) # 则递归查找 elif filename[-4:] == '.jpg':# 如果不是目录,则比较后缀名 list1.append(pathTmp) path = 'z:\\100' # path = input('请输入路径:').strip() get_py(path,list1) print('在%s目录及其子目录下找到%d个jpg文件,分别为:' % ( path,len(list1))) for filename in list1: print(filename) img = Image.open(filename) w = img.size[0] h = img.size[1] if w > h: path1 = "z:\\new\\1\\" shutil.copy(filename, path1) # print 'type1:' + str(len(filename)) elif w == h: # print '正方形图片' path2 = "z:\\new\\2\\" shutil.copy(filename, path2) # print'type2:' + str(len(filename)) else : # print "竖型图片" path3 = "z:\\new\\3\\" shutil.copy(filename, path3) print'type3:' + str(len(filename))
IT 0 0 52天前
admin
37
# coding: utf8 from PIL import Image import os, sys import shutil path = "z:\\new\\" filename = 2 img = Image.open("z:\\{}.gif ".format(filename)) w = img.size[0] h = img.size[1] if w > h: print "横型图片" path1 = "z:\\new\\1\\" shutil.copy("z:\\{}.jpg".format(filename),path1) elif w == h: print '正方形图片' path2 = "z:\\new\\2\\" shutil.copy("z:\\{}.gif ".format(filename),path2) else: print "竖型图片" path3 = "z:\\new\\3\\" shutil.copy("z:\\{}.PNG|.jpg ".format(filename),path3)
IT 0 0 52天前
admin
108
ip代理   
# -*- coding: UTF-8 -*-import requests # proxies = { "http": "http://125.70.13.77:8080" }# proxies = { "http": "http://122.235.175.128:8118"} # proxies = { "http": "http://120.77.249.46:8080" } x '61.135.217.7', '80'proxies = { "http": "http://61.135.217.7:80" }res = requests.get("http://www.882667.com/",proxies=proxies)# res = requests.get("http://www.baidu.com", proxies=proxies)print resprint(res.text)
IT 0 0 53天前
admin
23
# -*- coding: UTF-8 -*- import os, sys city = 'cd' path = "z:\\down\\test11\\" + city txtpath = 'filename' isExists=os.path.exists(path) if not os.path.exists(path): print '目录不存在' os.makedirs(path.decode('utf-8')) print '文件夹:' + path print "当前进程工作目录 : %s" % os.getcwd() else: print '目录已经存在....' print '文件夹:' + path print "当前进程工作目录 : %s" % os.getcwd() with open(path + "\\{}.txt".format(txtpath), "a") as f: f.write('456') f.close()目录已经存在.... 文件夹:z:\down\test11\cd 当前进程工作目录 : C:\Users\Administrator为filename.txt 写入'456'
IT 0 0 53天前
admin
17
# -*- coding: utf-8 -*- import os # path = 'Z:\\news\\001' # if not os.path.exists(path): # os.makedirs(path) path = 'Z:\\news\\news3' isExists=os.path.exists(path) if not os.path.exists(path): print '目录不存在' os.makedirs(path.decode('utf-8')) print path+'目录创建成功!' else: print '目录已经存在....'
IT 0 0 53天前
admin
15
['青羊','14','https://www.anjuke.com/chengdu/cm/qingyang/'],['锦江','11','https://www.anjuke.com/chengdu/cm/jinjiang/'],['金牛','11','https://www.anjuke.com/chengdu/cm/jinniu/'],['武侯','15','https://www.anjuke.com/chengdu/cm/wuhou/'],['成华','12','https://www.anjuke.com/chengdu/cm/chenghua/'],['高新区','9','https://www.anjuke.com/chengdu/cm/gaoxin/'],['天府新区','5','https://www.anjuke.com/chengdu/cm/tainfuxinqu/'],['蒲江','2','www.anjuke.com/chengdu/cm/cdpujiangxian/'],['温江','7','https://www.anjuke.com/chengdu/cm/wenjiang/'],['龙泉驿','7','https://www.anjuke.com/chengdu/cm/longquanyi/'],['双流','7','https://www.anjuke.com/chengdu/cm/shuangliu/'],['都江堰','7','https://www.anjuke.com/chengdu/cm/dujiangyan/'],['郫都','9','https://www.anjuke.com/chengdu/cm/piduqu/'],['新都','8','https://www.anjuke.com/chengdu/cm/xindu/'],['青白江','3','https://www.anjuke.com/chengdu/cm/qingbaijiangqu/'],['新津','3','https://www.anjuke.com/chengdu/cm/xinjinxian/'],['金堂','4','https://www.anjuke.com/chengdu/cm/jintangxian/'],['彭州','3','https://www.anjuke.com/chengdu/cm/pengzhoushi/'],['崇州','4','https://www.anjuke.com/chengdu/cm/chongzhoushi/'],['大邑','3','https://www.anjuke.com/chengdu/cm/dayixian/'],['简阳','3','https://www.anjuke.com/chengdu/cm/jianyangsh/'],['邛崃','3','https://www.anjuke.com/chengdu/cm/qionglaishi/'],['大邑','3','https://www.anjuke.com/chengdu/cm/dayixian/']
IT 0 0 53天前
admin
32
DELETE FROM table_name
IT 0 0 58天前
admin
71
# -*- coding: utf-8 -*-import refrom bs4 import BeautifulSoupimport urllib2url = "https://www.anjuke.com/shanghai/cm/"header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Cache-Control': 'no-cache', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'}request = urllib2.Request(url, headers=header)response = urllib2.urlopen(request, timeout=15)data = response.read()soup = BeautifulSoup(data, "html.parser")# print soup.title.string# soup.find_all(class_='P3')list = soup.find_all(class_="P2a")# pn = soup.find_all(class_="P4")# print pn 2018年10月18日 10:00:00listurls = soup.find_all('a', href=re.compile('https://www.anjuke.com/shanghai/cm/[a-zA-Z]{4,15}/'))for url in listurls: if not re.search('(.*zhoubian|p(\d+))', url['href']): # print url.get_text(),url['href'] urls = url['href'] name = url.get_text() # print url.get_text() # m = re.split('\d+', 'url.get_text()') # # # print url.get_text() # if re.search('((\d+))', url['href']): # print int(url.get_text()[-1]) request = urllib2.Request(urls, headers=header) response = urllib2.urlopen(request, timeout=15) pn = response.read() soup = BeautifulSoup(pn, "html.parser") pn = soup.find(class_="P4").get_text() # if re.search('((\d+))', pn): pn = pn.encode('utf-8') # print pn # pn = re.sub('\s', '', pn) pn = re.sub('上一页|下一页', '', pn) pn = re.sub('\s', '', pn) print pn.split() # print name.encode("utf8") with open("D:\\down\\loupan\\shanghai\\shanghai-1012.txt", "a") as f: f.write("['" + name.encode("utf8") + "','" + pn + "','" + url['href'].encode("utf8") + "'],")##二 获取 具体小区名称 --# -*- coding: UTF-8 -*-import osimport mathimport urllib2import urllibimport reimport timeimport randomimport syssys.path.append("../")import MySQLdbfrom ftplib import FTPimport shutilimport threadingimport randomfrom random import choicepath= "D:\\down\\loupan\\chengdu\\"# LS = [['大邑','3','www.anjuke.com/chengdu/cm/dayixian/'],['蒲江','2','www.anjuke.com/chengdu/cm/cdpujiangxian/']]# LS = [['温江','7','https://www.anjuke.com/chengdu/cm/wenjiang/'],['龙泉驿','7','https://www.anjuke.com/chengdu/cm/longquanyi/'],['双流','7','https://www.anjuke.com/chengdu/cm/shuangliu/'],['都江堰','7','https://www.anjuke.com/chengdu/cm/dujiangyan/'],['郫都','9','https://www.anjuke.com/chengdu/cm/piduqu/'],['新都','8','https://www.anjuke.com/chengdu/cm/xindu/'],['青白江','3','https://www.anjuke.com/chengdu/cm/qingbaijiangqu/'],['新津','3','https://www.anjuke.com/chengdu/cm/xinjinxian/'],['金堂','4','https://www.anjuke.com/chengdu/cm/jintangxian/'],['彭州','3','https://www.anjuke.com/chengdu/cm/pengzhoushi/'],['崇州','4','https://www.anjuke.com/chengdu/cm/chongzhoushi/'],['大邑','3','https://www.anjuke.com/chengdu/cm/dayixian/']]# LS = [['简阳','3','https://www.anjuke.com/chengdu/cm/jianyangsh/'],['邛崃','3','https://www.anjuke.com/chengdu/cm/qionglaishi/']]LS = [['闵行','16','https://www.anjuke.com/shanghai/cm/minhang/'],['宝山','12','https://www.anjuke.com/shanghai/cm/baoshan/'],['徐汇','16','https://www.anjuke.com/shanghai/cm/xuhui/'],['松江','11','https://www.anjuke.com/shanghai/cm/songjiang/'],['嘉定','11','https://www.anjuke.com/shanghai/cm/jiading/'],['浦东','34','https://www.anjuke.com/shanghai/cm/pudong/'],['闵行','16','https://www.anjuke.com/shanghai/cm/minhang/'],['宝山','12','https://www.anjuke.com/shanghai/cm/baoshan/'],['徐汇','16','https://www.anjuke.com/shanghai/cm/xuhui/'],['松江','11','https://www.anjuke.com/shanghai/cm/songjiang/'],['嘉定','17','https://www.anjuke.com/shanghai/cm/jiading/'],['静安','17','https://www.anjuke.com/shanghai/cm/jingan/'],['普陀','11','https://www.anjuke.com/shanghai/cm/putuo/'],['杨浦','13','https://www.anjuke.com/shanghai/cm/yangpu/'],['虹口','12','https://www.anjuke.com/shanghai/cm/hongkou/'],['长宁','12','https://www.anjuke.com/shanghai/cm/changning/'],['黄浦','13','https://www.anjuke.com/shanghai/cm/huangpu/'],['青浦','9','https://www.anjuke.com/shanghai/cm/qingpu/'],['奉贤','7','https://www.anjuke.com/shanghai/cm/fengxian/'],['金山','5','https://www.anjuke.com/shanghai/cm/jinshan/'],['崇明','3','https://www.anjuke.com/shanghai/cm/chongming/']]for l in LS: name = l[0] pn = l[1] print pn url = l[2] txtpath = url.split('/')[-2] # print txtpath # for x in range(1, len(l)): # print x # # pass urls = ['https://www.anjuke.com/shanghai/cm/'+txtpath+'/p{}'.format(i) for i in range(1,int(pn))] print urls time.sleep(6) header = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'no-cache', 'User-Agent':'Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit /537.36(KHTML, like Gecko) Chrome/70.0.3510.2 Safari/537.36' } for url in urls: request = urllib2.Request(url,headers=header) response = urllib2.urlopen(request, timeout=15) data = response.read() response.close() searchObj = re.findall(r'/shanghai/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I) # searchObj = re.findall(r'/chengdu/cm\d.*?/"\n\s*target="_blank">(.*?)</a></em>', data, re.M|re.I) for i in searchObj: print i +';', time.sleep(0.7) with open("D:\\down\\loupan\\shanghai\\{}.txt".format(txtpath), "a") as f: f.write(name + str(i) +"," )#"D:\Program Files (x86)\Anaconda2\python.exe" "D:/Program Files/JetBrains/PyCharm 2017.3.3/helpers/pydev/爬楼批量1012.py"#34#民乐城秀园西苑; 三林苑; 宣桥枫庭; 仁恒滨江园(三期); #打印 文件夹 文件数及文件名## -*- coding: utf-8 -*-__author__ = 'Administrator'# import os# path = os.getcwd() #获取当前路径# count = 0# for root,dirs,files in os.walk(path): #遍历统计# for each in files:# count += 1 #统计文件夹下文件个数# print count #输出结果#统计 /home/dir/ 下的文件夹个数import ospath ="D:\\down\\loupan\\1012"count = 0for fn in os.listdir(path): #fn 表示的是文件名 count = count+1 print "L"+ str(count) + ' = '+' path ' + "+'"+fn+"'"# print countfor i in range(1,count+1): print 'L'+str(i)+',',## 统计文件夹下文件个数## 发布到数据库# -*- coding: utf-8 -*-import MySQLdbimport reimport randomfrom random import choiceimport timefrom faker import Fakerfake = Faker("zh_CN")# 连接MySQL数据库# db = MySQLdb.connect("127.0.0.1", "root", "*****%", "data", charset="utf8")db = MySQLdb.connect(host="cd.zgxd.net", port=3306, user="cd_zgxd_net",passwd="*****BI5%",db="cd_zgxd_net",charset="utf8")cursor = db.cursor()# for i in range(1,5):# file = 'D:\down\shanghai\Jjingan.txt'# file = 'D:\down\shanghai\TCD.txt'# file = 'D:\down\P\PChengdu\T.txt'# file = 'D:\\down\\loupan\\chengdu\\TCD.txt'path= "D:\\down\\loupan\\1012\\"L1 = path +'cdpujiangxian.txt'L2 = path +'chongzhoushi.txt'L3 = path +'dujiangyan.txt'L4 = path +'jianyangsh.txt'L5 = path +'jintangxian.txt'L6 = path +'longquanyi.txt'L7 = path +'pengzhoushi.txt'L8 = path +'piduqu.txt'L9 = path +'qingbaijiangqu.txt'L10 = path +'qionglaishi.txt'L11 = path +'shuangliu.txt'L12 = path +'wenjiang.txt'L13 = path +'xindu.txt'L14 = path +'xinjinxian.txt'LS=[L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14]for l in LS: sale = ['销售', '出售', '低转', '转让', '出售', '低售', '急售', '诚意出售', '亏本出售','亏转','急转','个人转让','个人出售','诚售'] pp = ['REALSTAR', '绿奥', '洁丰', '奥维斯特', '玫瑰园', '伊斯曼', '布兰妮', '正章', '赛维', '航星', 'CAS', '卡柏', '衣之恋', '衣适家', '灰姑娘', '德奈福', '伊卡诺', '洁神', '约瑟芬', '依莱尔', '净衣馆', '朵拉', '阿玛尼', '象王', '万星', '雄狮', '福奈特', '澳洁', '泰洁', '伊尔萨', '玛丽阿姨', '蕾奇尔', '尤萨', '威特斯', '施柏丽', '康洁', '美涤', '格利特', '执燊', '石油', '绿奥', '洁丰', '奥维斯特', '玫瑰园', '伊斯曼', '布兰妮', '多美依', '正章', '赛维', 'CAS', '卡柏', '衣之恋', '衣适家', '灰姑娘', '德奈福', '伊卡诺', '洁神', '约瑟芬', '喜兰妮', '净衣馆', '朵拉', '阿玛尼', '象王', '美一天', '涤派', '福奈特', '澳洁', '泰洁', '玛丽阿姨', '蕾奇尔', '尤萨', '威特斯', '伊尔萨', '澳洁', '泰洁', '施柏丽', '航星', '绿奥', '洁丰', '奥维斯特', '玫瑰园', '伊斯曼', '布兰妮', '正章', '赛维', 'CAS', 'UCC', '卡柏', '衣之恋', '衣适家', '灰姑娘', '德奈福', '伊卡诺', '洁神', '约瑟芬', '澳贝森', '净衣馆', '阿玛尼', '朵拉', '象王', '雪芙莱', '凯瑟琳', '鼎好', '天天新', '皇家圣雪', '贝朗', '优力美','维特妮','汉洁' ] sb = ['干洗机', '水洗机', '干洗机', '干洗设备', '干洗店设备', '水洗设备', '干洗店设备', '洗涤设备', '宾馆洗衣房设备', '洗衣房设备', '烘干机', '烘干设备'] cs = ['八成新', '九成新', '全新', '', '九五新', '九九新', '八五新', ''] JG = ['¥9000元','¥8000元','¥7000元','¥6000元','¥5000元','¥4000元','¥3000元','¥8500元','¥7500元','¥6500元','¥5500元','¥4500元','¥3500元','面议','电议'] zs = [',赠送', ',另有', ',送', ',加送', ',免费送'] other = ['熨烫设备', '缝纫机', '消毒柜', '服装传输线', '干洗材料', '水洗材料', '四氯乙烯', '干洗耗材'] rl = ['8公斤','10公斤','12公斤','15公斤','20公斤','25公斤','30公斤','10公斤','50公斤'] sql = "select max(id) from `phome_ecms_news`" cursor.execute(sql) id_info = cursor.fetchone() ClassId = '13' # QQ = '527573696' Writer = "上海干洗" Befrom = "zgxd.net" Company = "上海干洗加盟" # Phone = '13983000191' dq = '成都市' print id_info if id_info[0] != None: g_id = id_info[0] else: g_id = 0 for line in open(l): lines = line.split(',') pattern = re.compile(r',') # 查找, result1 = pattern.findall(line) max = len(result1) + 1 print 'max:' + str(max) total = max + g_id print "total:" +str(total) n = 1 for ad in lines: g_id = g_id + 1 dianji_i = random.randint(16858, 39899) titlepic_i = random.randint(1, 265) sa = choice(sale) pp1 = choice(pp) sb1 = choice(sb) cs1 = choice(cs) zs1 = choice(zs) rl1 = choice(rl) other1 = choice(other) man = fake.last_name().encode('utf-8') + '先生' # 男性姓名 woman = fake.last_name().encode('utf-8') + '女士' # 女性姓名 QQ = random.randint(56858, 899989988) name = [man, woman] name1 = choice(name) data = str(fake.date_between(start_date="-2y", end_date="now")) phone = str(fake.phone_number()) dq = '成都' + ad Company = '成都' + pp1 i = sa + cs1 + pp1 + sb1 smalltext = '成都' + ad + i + zs1 + other1 + '.联系人:' + name1 newstext = smalltext + ',' + '出厂日期:' + data + '.' + pp1 + sb1 + '.规格:' + rl1 # dianji_i = '999' smalltext = newstext detail_title = i detail_smalltext = newstext detail_infotags = pp1 detail_keyboard = sb1 detail_titlepic = 'http://www.pictutu.com/img/ganxiji/' + str(titlepic_i) + '.jpg' jiage = choice(JG) sql = "insert into `phome_ecms_news` (`id`, `classid`, `onclick`, `newspath`, `keyboard`, `keyid`, `userid`, `username`, `ztid`, `checked`, `istop`, `truetime`, `ismember`, `dokey`, `userfen`, `isgood`, `titlefont`, `titleurl`, `filename`, `groupid`, `newstempid`, `plnum`, `firsttitle`, `isqf`, `totaldown`, `title`, `newstime`, `titlepic`, `closepl`, `havehtml`, `lastdotime`, `haveaddfen`, `infopfen`, `infopfennum`, `votenum`, `ftitle`, `smalltext`, `diggtop`, `stb`, `copyids`, `ttid`, `infotags`, `ispic`, `phone`) values ( %s, %s, %s, '%s', '%s', '', '1', 'admin', '', '1', '0', '%s', '0', '1', '0', '0', '', '', '%s', '0', '0', '0', '0', '0', '0', '%s', '%s', '%s', '0', '1', '%s', '0', '0', '0', '0', '', '%s', '0', '1', '1', '0', '%s', '1', %s);" % ( g_id, ClassId, dianji_i, time.strftime("%Y-%m-%d", time.localtime()), detail_keyboard, int(time.time()), g_id, detail_title, int(time.time()), detail_titlepic, int(time.time()), detail_smalltext, detail_infotags, phone) # 写入数据库 # sql = "INSERT INTO test(text,title) VALUES ('%s','%s')" % (sa,i) cursor.execute(sql) sql = "insert into `phome_ecms_news_data_1` (`id`, `classid`, `writer`, `befrom`, `newstext`, `company`, `qq`, `jiage`, `dq`, `pp`,`cs`,`sb`,`type`) values ( %s, %s, '%s', '%s', '%s', '%s', %s, '%s','%s','%s','%s','%s','%s');" % ( g_id, ClassId, name1, Befrom, newstext, Company, QQ, jiage,dq,pp1,cs1,sb1,rl1) # Company cursor.execute(sql) try: db.commit() n += 1 except: db.rollback() print("insert error")db.close()print '插入完成' + str(max) + '记录'print '当前已经完成的ID:' + str(total)
IT 0 1 59天前
admin
26
#统计 /home/dir/ 下的文件夹个数import ospath ="D:\\down\\loupan\\1012"count = 0for fn in os.listdir(path): #fn 表示的是文件名 count = count+1 print "L"+ str(count) + ' = '+' path ' + "+'"+fn+"'"# print countfor i in range(1,count+1): print 'L'+str(i)+',',print 文件路径L14 = path +'xinjinxian.txt'LS=[L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14]# -*- coding: utf-8 -*- #统计 /home/dir/ 下的文件夹个数 import os path ="D:\\down\\loupan\\shanghai\\" count = 0 for fn in os.listdir(path): #fn 表示的是文件名 count = count+1 print "L"+ str(count) + ' = '+' path ' + "+'"+fn+"'" # print count for i in range(1,count+1): print 'L'+str(i)+',',
IT 0 1 59天前
admin
38
# -*- coding: utf-8 -*- import re from bs4 import BeautifulSoup import urllib2 url = "https://www.anjuke.com/chengdu/cm/" header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Cache-Control': 'no-cache', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36' } request = urllib2.Request(url, headers=header) response = urllib2.urlopen(request, timeout=15) data = response.read() soup = BeautifulSoup(data, "html.parser") # print soup.title.string # soup.find_all(class_='P3') list = soup.find_all(class_="P2a") # pn = soup.find_all(class_="P4") # print pn listurls = soup.find_all('a', href=re.compile('https://www.anjuke.com/chengdu/cm/[a-zA-Z]{4,15}/')) for url in listurls: if not re.search('(.*zhoubian|p(\d+))', url['href']): # print url.get_text(),url['href'] urls = url['href'] name = url.get_text() # print url.get_text() # m = re.split('\d+', 'url.get_text()') # # # print url.get_text() # if re.search('((\d+))', url['href']): # print int(url.get_text()[-1]) request = urllib2.Request(urls, headers=header) response = urllib2.urlopen(request, timeout=15) pn = response.read() soup = BeautifulSoup(pn, "html.parser") pn = soup.find(class_="P4").get_text() # if re.search('((\d+))', pn): pn = pn.encode('utf-8') # print pn # pn = re.sub('\s', '', pn) pn = re.sub('上一页|下一页', '', pn) pn = re.sub('\s', '', pn) print pn # print name.encode("utf8") with open("D:\\down\\loupan\\chengdu\\city-1012.txt", "a") as f: f.write("['" + name.encode("utf8") + "','" + pn + "','" + url['href'].encode("utf8") + "'],") # 页码: 12345678910  12345678910 12345678910
IT 0 0 59天前
LOOK官方站 联系站长
Powered by Look博客