源码:
1 import requests 2 from lxml import etree 3 from my_mysql import MysqlConnect 4 5 6 mc = MysqlConnect('127.0.0.1','root','123456','homework') 7 sql = 'insert into lianjia(title,addr,shape,area,dire,price) values(%s,%s,%s,%s,%s,%s)' 8 for page in range(3): 9 url = 'https://bj.lianjia.com/zufang/pg{}rp2rp1/'.format(page)10 response = requests.get(url)11 html = etree.HTML(response.text)12 li_list = html.xpath('//ul[@id="house-lst"]/li')13 # print(li_list)14 for li_ele in li_list:15 title = li_ele.xpath('./div[2]/h2/a')[0].text16 addr = li_ele.xpath('./div[2]/div[1]/div[1]/a/span')[0].text17 shape = li_ele.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text18 area = li_ele.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text19 dire = li_ele.xpath('./div[2]/div[1]/div[1]/span[3]')[0].text20 price = li_ele.xpath('./div[2]/div[2]/div[1]/span')[0].text21 # print(title,addr,shape,area,price)22 data = (title,addr,shape,area,dire,price)23 print(data)24 mc.exec_data(sql,data)25 # break