{"id":618,"date":"2023-02-25T18:19:20","date_gmt":"2023-02-25T09:19:20","guid":{"rendered":"https:\/\/deskplate.net\/blog\/?p=618"},"modified":"2023-02-25T18:24:24","modified_gmt":"2023-02-25T09:24:24","slug":"google-colaboratory-install-chrome-chrome-driver-selenium","status":"publish","type":"post","link":"https:\/\/deskplate.net\/blog\/archives\/618","title":{"rendered":"Google Colaboratory install Chrome, Chrome Driver, Selenium"},"content":{"rendered":"<p>\u6700\u521d\u306e\u30b3\u30fc\u30c9\u30d6\u30ed\u30c3\u30af<br \/>\n%%shell<\/p>\n<p>cat &gt; \/etc\/apt\/sources.list.d\/debian.list &lt;&lt;&#8216;EOF&#8217;<br \/>\ndeb [arch=amd64 signed-by=\/usr\/share\/keyrings\/debian-buster.gpg] http:\/\/deb.debian.org\/debian buster main<br \/>\ndeb [arch=amd64 signed-by=\/usr\/share\/keyrings\/debian-buster-updates.gpg] http:\/\/deb.debian.org\/debian buster-updates main<br \/>\ndeb [arch=amd64 signed-by=\/usr\/share\/keyrings\/debian-security-buster.gpg] http:\/\/deb.debian.org\/debian-security buster\/updates main<br \/>\nEOF<\/p>\n<p>apt-key adv &#8211;keyserver keyserver.ubuntu.com &#8211;recv-keys DCC9EFBF77E11517<br \/>\napt-key adv &#8211;keyserver keyserver.ubuntu.com &#8211;recv-keys 648ACFD622F3D138<br \/>\napt-key adv &#8211;keyserver keyserver.ubuntu.com &#8211;recv-keys 112695A0E562B32A<br \/>\napt-key export 77E11517 | gpg &#8211;dearmour -o \/usr\/share\/keyrings\/debian-buster.gpg<br \/>\napt-key export 22F3D138 | gpg &#8211;dearmour -o \/usr\/share\/keyrings\/debian-buster-updates.gpg<br \/>\napt-key export E562B32A | gpg &#8211;dearmour -o \/usr\/share\/keyrings\/debian-security-buster.gpg<\/p>\n<p>cat &gt; \/etc\/apt\/preferences.d\/chromium.pref &lt;&lt; &#8216;EOF&#8217;<br \/>\nPackage: *<br \/>\nPin: release a=eoan<br \/>\nPin-Priority: 500<\/p>\n<p>Package: *<br \/>\nPin: origin &#8220;deb.debian.org&#8221;<br \/>\nPin-Priority: 300<\/p>\n<p>Package: chromium*<br \/>\nPin: origin &#8220;deb.debian.org&#8221;<br \/>\nPin-Priority: 700<br \/>\nEOF<\/p>\n<p>\u6b21\u30d6\u30ed\u30c3\u30af<br \/>\n!apt-get update<br \/>\n!apt-get install chromium chromium-driver<br \/>\n!pip install -q selenium<\/p>\n<p>\u78ba\u8a8d<br \/>\nfrom selenium import webdriver<br \/>\nfrom selenium.webdriver.common.by import By<br \/>\nfrom bs4 import BeautifulSoup<br \/>\nimport time<br \/>\nimport pandas as pd<\/p>\n<p>try:<br \/>\n    # \u30d6\u30e9\u30a6\u30b6\u3092headless\u30e2\u30fc\u30c9\u5b9f\u884c<br \/>\n    options = webdriver.ChromeOptions()<br \/>\n    #\u30d8\u30c3\u30c9\u30ec\u30b9\u30e2\u30fc\u30c9\uff08\u30d0\u30c3\u30af\u30b0\u30e9\u30a6\u30f3\u30c9\u3067\u8d77\u52d5\uff09\u3067\u5b9f\u884c\u3002\u30b3\u30e9\u30dc\u306e\u5834\u5408\u3001\u5fc5\u9808\u3002<br \/>\n    options.add_argument(&#8216;&#8211;headless&#8217;)<br \/>\n    #\u30b5\u30f3\u30c9\u30dc\u30c3\u30af\u30b9\u30e2\u30fc\u30c9\u306e\u89e3\u9664\u3002\u3053\u308c\u3082\u5fc5\u9808\u3002<br \/>\n    options.add_argument(&#8216;&#8211;no-sandbox&#8217;)<br \/>\n    #\u3053\u308c\u3082\u8a2d\u5b9a\u3057\u305f\u65b9\u304c\u3088\u3044\u3002<br \/>\n    options.add_argument(&#8216;&#8211;disable-dev-shm-usage&#8217;)<\/p>\n<p>    #\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u5316<br \/>\n    driver = webdriver.Chrome(&#8216;chromedriver&#8217;, options=options)<br \/>\n    #\u6307\u5b9a\u3057\u305f\u30c9\u30e9\u30a4\u30d0\u30fc\u304c\u898b\u3064\u304b\u308b\u307e\u3067\u5f85\u6a5f<br \/>\n    driver.implicitly_wait(10)<\/p>\n<p>    base_url = &#8220;https:\/\/www.amazon.co.jp\/gp\/new-releases\/books\/466298\/ref=zg_bsnr_unv_books_2_492350_1&#8221;<br \/>\n    driver.get(base_url)<br \/>\n    time.sleep(5)<\/p>\n<p>    # scroll<br \/>\n    while len(driver.find_elements(By.ID, &#8220;gridItemRoot&#8221;)) &lt; 50:<br \/>\n        time.sleep(5)<br \/>\n        driver.execute_script(&#8220;window.scrollBy(0, 3000);&#8221;) <\/p>\n<p>    # \u30ec\u30d3\u30e5\u30fc\u3054\u3068\u306e\u8981\u7d20<br \/>\n    prosuct_elements = driver.find_elements(By.ID, &#8220;gridItemRoot&#8221;)<br \/>\n    results = list() for i_section in prosuct_elements:<br \/>\n        result_row = dict()<br \/>\n        # url a_element = i_section.find_element(By.CSS_SELECTOR, &#8220;div &gt; div:nth-child(2) &gt; div &gt; a:nth-child(2)&#8221;)<br \/>\n        result_row[&#8220;url&#8221;] = a_element.get_attribute(&#8220;href&#8221;)<br \/>\n        # \u88fd\u54c1\u540d<br \/>\n        result_row[&#8220;name&#8221;] = a_element.text<\/p>\n<p>        print(result_row)<\/p>\n<p>        results.append(result_row)<br \/>\nfinally:<br \/>\n    driver.quit()<\/p>\n<p>df = pd.DataFrame(results)<br \/>\nprint(df.head())<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6700\u521d\u306e\u30b3\u30fc\u30c9\u30d6\u30ed\u30c3\u30af %%shell c<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[50,39],"tags":[],"class_list":["post-618","post","type-post","status-publish","format-standard","hentry","category-google-colabory","category-python"],"_links":{"self":[{"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/posts\/618","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/comments?post=618"}],"version-history":[{"count":4,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/posts\/618\/revisions"}],"predecessor-version":[{"id":622,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/posts\/618\/revisions\/622"}],"wp:attachment":[{"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/media?parent=618"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/categories?post=618"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/deskplate.net\/blog\/wp-json\/wp\/v2\/tags?post=618"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}