defparse(self,response): html = response.text #获得爬到的网页的HTML代码
soup = BeautifulSoup(html,'lxml') #创建一个soup对象 weather = WeatherItem() #创建一个weatherItem对象,存储爬取的信息 div_7d = soup.find('div',class_='c7d') #找到class为c7d的标签 if div_7d isnotNone: #如果存在 ul = div_7d.find('ul') #逐步缩小范围 if ul isnotNone: for li in ul.find_all('li'): if li isnotNone: weather['day'] = li.find('h1').get_text() #取标签包含的数据 weather['wea'] = li.find('p').get_text() span = li.find('span') if span isnotNone: weather['highest'] = span.get_text() weather['lowest'] = li.find('i').get_text() print(weather)