{"id":9635,"date":"2018-07-22T11:52:18","date_gmt":"2018-07-22T02:52:18","guid":{"rendered":"https:\/\/www.itblog.jp\/?p=9635"},"modified":"2018-07-22T11:52:18","modified_gmt":"2018-07-22T02:52:18","slug":"scrapy%e3%81%a7%e3%82%af%e3%83%ad%e3%83%bc%e3%83%ab%e4%b8%ad%e3%81%ab%e3%83%aa%e3%83%b3%e3%82%af%e5%85%88%e3%81%ae%e3%83%9a%e3%83%bc%e3%82%b8%e3%81%8b%e3%82%89%e6%8a%bd%e5%87%ba%e3%81%97%e3%81%9f","status":"publish","type":"post","link":"https:\/\/www.itblog.jp\/?p=9635","title":{"rendered":"Scrapy\u3067\u30af\u30ed\u30fc\u30eb\u4e2d\u306b\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u304b\u3089\u62bd\u51fa\u3057\u305f\u8981\u7d20\u3092\u8ffd\u52a0\u3059\u308b"},"content":{"rendered":"<p>pyhton\u306e\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u300cScrapy\u300d\u3067\u3001\u4ee5\u4e0b\u306e\u3053\u3068\u3092\u5b9f\u88c5\u3057\u3088\u3046\u3068\u601d\u3044\u3001\u65b9\u6cd5\u3067\u884c\u304d\u8a70\u307e\u3063\u3066\u3057\u307e\u3044\u307e\u3057\u305f\u3002<\/p>\n<p>\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u4e2d\u306b\u3001\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u304b\u3089\u7279\u5b9a\u306e\u8981\u7d20\u3092\u53d6\u5f97\u3057\u3001\u5143\u306e\u51e6\u7406\u306b\u52a0\u3048\u308b<\/p>\n<p>\u8981\u3059\u308b\u306b\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30a4\u30e1\u30fc\u30b8\u3067\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \" >def parse(self, response):\r\n\r\nfor url in response.xpath('\u301c\u301c').extract():\r\n     \u524d\u51e6\u7406 \r\n     scrapy.Request(url, \u51e6\u74062)\r\n     \u5f8c\u51e6\u7406\r\n\r\ndef \u51e6\u74062(self, response):\r\n   \u3000\u301c\u301c\u301c\u301c\u301c\u301c\u301c<\/pre>\n<p>\u4e0a\u8a18\u306e\u3088\u3046\u306a\u51e6\u7406\u306f\u3001Scrapy\u3067\u3088\u304f\u906d\u9047\u3057\u307e\u3059\u3002<\/p>\n<p>parse\u306e\u51e6\u7406\u4e2d\u306b\u3001\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u304b\u3089\u8981\u7d20\u3092\u53d6\u5f97\u3057\u3001yield\u3067\u8fd4\u3057\u307e\u3059\u3002<\/p>\n<p>\u305f\u3060\u3057\u3001\u4e0a\u8a18\u306e\u8a18\u8ff0\u306f\u3001\u51e6\u74062\u3067\u8fd4\u3057\u305f\u8f9e\u66f8\u578b\u306e\u8981\u7d20\u3092\u3001\u305d\u306e\u307e\u307eRequest\u578b\u3067\u8fd4\u3059\u305f\u3081\u3001\u5358\u306b\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u3067\u7279\u5b9a\u306e\u5024\u3060\u3051\u53d6\u5f97\u3057\u3066\u305d\u306e\u5f8c\u306e\u51e6\u7406\u3067\u305d\u306e\u5024\u3092\u4f7f\u3044\u305f\u3044\u5834\u5408\u306b\u306f\u3001\u4e0a\u624b\u304f\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u305b\u3093\u3002<\/p>\n<p>\u3069\u3046\u89e3\u6c7a\u3057\u305f\u3089\u3044\u3044\u304b\u3092\u8003\u3048\u3066\u307f\u305f\u306e\u3067\u3059\u304c\u3001\u7d50\u8ad6\u3068\u3057\u3066\u306f\u3001Scrapy.Request\u3092\u4f7f\u7528\u3057\u306a\u3044\u3068\u3044\u3046\u3053\u3068\u3067\u89e3\u6c7a\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>URL\u306f\u78ba\u5b9a\u3057\u3066\u3044\u3066\u3001\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u306e\u7279\u5b9a\u306e\u8981\u7d20\u3092\u53d6\u5f97\u3057\u305f\u3044\u3060\u3051\u3067\u3042\u308c\u3070\u3001\u4ed6\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u306eBeautiful Soup\u306a\u3069\u3092\u4f7f\u3046\u3053\u3068\u3067\u3082\u89e3\u6c7a\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u51e6\u7406\u3092\u4f7f\u7528\u3059\u308b\u3053\u3068\u3067\u30ea\u30f3\u30af\u5148\u30da\u30fc\u30b8\u306e\u30bf\u30a4\u30c8\u30eb\u30bf\u30b0\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u7121\u7406\u306bScrapy\u306eRequest\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u4f7f\u3063\u3066\u53d6\u5f97\u3059\u308b\u5fc5\u8981\u306f\u306a\u304f\u3001\u51e6\u7406\u306e\u4e2d\u3067\u5225\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u7528\u3059\u308b\u3053\u3068\u3067\u4e0a\u624b\u304f\u89e3\u6c7a\u3067\u304d\u307e\u3057\u305f\u3002<\/p>\n<pre class=\"lang:default decode:true \" >import urllib.request\r\nfrom bs4 import BeautifulSoup\r\n\r\nhtml = urllib.request.urlopen(url)\r\n    soup = BeautifulSoup(html, 'html.parser')\r\n    for title in soup.find_all('title'):\r\n        title = title.text<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>pyhton\u306e\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u300cScrapy\u300d\u3067\u3001\u4ee5\u4e0b\u306e\u3053\u3068\u3092\u5b9f\u88c5\u3057\u3088\u3046\u3068\u601d\u3044\u3001\u65b9\u6cd5\u3067\u884c\u304d\u8a70\u307e\u3063\u3066\u3057\u307e\u3044\u307e\u3057\u305f\u3002 \u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u4e2d\u306b\u3001\u30ea\u30f3\u30af\u5148\u306e\u30da\u30fc\u30b8\u304b\u3089\u7279\u5b9a\u306e\u8981\u7d20\u3092\u53d6\u5f97\u3057\u3001\u5143\u306e\u51e6\u7406\u306b\u52a0\u3048\u308b \u8981\u3059\u308b\u306b\u3001\u4ee5\u4e0b [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[150,131],"tags":[],"class_list":["post-9635","post","type-post","status-publish","format-standard","hentry","category-python-","category-131"],"_links":{"self":[{"href":"https:\/\/www.itblog.jp\/index.php?rest_route=\/wp\/v2\/posts\/9635","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.itblog.jp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.itblog.jp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.itblog.jp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.itblog.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=9635"}],"version-history":[{"count":0,"href":"https:\/\/www.itblog.jp\/index.php?rest_route=\/wp\/v2\/posts\/9635\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.itblog.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=9635"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.itblog.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=9635"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.itblog.jp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=9635"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}