add scrapy
This commit is contained in:
12
quotes.jsonl
Normal file
12
quotes.jsonl
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
gi{"author": "Jane Austen", "text": "\u201cThe person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.\u201d"}
|
||||||
|
{"author": "Steve Martin", "text": "\u201cA day without sunshine is like, you know, night.\u201d"}
|
||||||
|
{"author": "Garrison Keillor", "text": "\u201cAnyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.\u201d"}
|
||||||
|
{"author": "Jim Henson", "text": "\u201cBeauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.\u201d"}
|
||||||
|
{"author": "Charles M. Schulz", "text": "\u201cAll you need is love. But a little chocolate now and then doesn't hurt.\u201d"}
|
||||||
|
{"author": "Suzanne Collins", "text": "\u201cRemember, we're madly in love, so it's all right to kiss me anytime you feel like it.\u201d"}
|
||||||
|
{"author": "Charles Bukowski", "text": "\u201cSome people never go crazy. What truly horrible lives they must lead.\u201d"}
|
||||||
|
{"author": "Terry Pratchett", "text": "\u201cThe trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it.\u201d"}
|
||||||
|
{"author": "Dr. Seuss", "text": "\u201cThink left and think right and think low and think high. Oh, the thinks you can think up if only you try!\u201d"}
|
||||||
|
{"author": "George Carlin", "text": "\u201cThe reason I talk to myself is because I\u2019m the only one whose answers I accept.\u201d"}
|
||||||
|
{"author": "W.C. Fields", "text": "\u201cI am free of all prejudice. I hate everyone equally. \u201d"}
|
||||||
|
{"author": "Jane Austen", "text": "\u201cA lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.\u201d"}
|
||||||
19
quotes_spider.py
Normal file
19
quotes_spider.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import scrapy
|
||||||
|
|
||||||
|
class QuoteSpider(scrapy.Spider):
|
||||||
|
name = "quotes"
|
||||||
|
start_urls = [
|
||||||
|
'http://quotes.toscrape.com/tag/humor/',
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
for quote in response.css('div.quote'):
|
||||||
|
yield {
|
||||||
|
'author': quote.css('small.author::text').get(),
|
||||||
|
'text': quote.css('span.text::text').get(),
|
||||||
|
# 'tags': quote.css('div.tags a.tag::text').getall(),
|
||||||
|
}
|
||||||
|
|
||||||
|
next_page = response.css('li.next a::attr(href)').get()
|
||||||
|
if next_page is not None:
|
||||||
|
yield response.follow(next_page, callback=self.parse)
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
requests~=2.31.0
|
||||||
|
ipython~=8.24.0
|
||||||
Reference in New Issue
Block a user