snipt

Ctrl+h for KB shortcuts

Python

get similar images URLs from google

__author__ = 'omriko'
from selenium import webdriver
import urllib
import urlparse

def get_similar_images_urls(browser, image_url):
    # prepare image url
    google_image_search_url= "https://www.google.co.il/searchbyimage?hl=en-IL&image_url="
    image_url_encoded = urllib.quote_plus(image_url)
    image_search_url= google_image_search_url + image_url_encoded

    # open google base search
    browser.get(image_search_url)

    # click on the "Visually similar images"
    browser.find_element_by_link_text('Visually similar images').click()

    # get results list
    results = browser.find_elements_by_css_selector(".rg_di.rg_el.ivg-i a")

    links = []

    # iterate results
    for result in results:
        # get link from result object
        link = result.get_attribute('href')

        # link will be in the format: http://www.google.co.il/imgres?imgurl=...&imgrefurl=...&h=...&w=...&tbnid=...&docid=...&hl=...&ei=...&tbm=...
        # we only need imagefurl, lets fetch it
        parsed_link = urlparse.urlparse(link)
        page_link = urlparse.parse_qs(parsed_link.query)['imgrefurl']

        links.append(page_link)

    return links

# open browser in URL
browser = webdriver.Firefox()

# fetch links
links = get_similar_images_urls(browser, image_url)

Description

get a list links of similar images from google using selenium (you can change firefox to your browser of choice)
https://snipt.net/embed/075ee7d49022f445864058af129ef118/
/raw/075ee7d49022f445864058af129ef118/
075ee7d49022f445864058af129ef118
python
Python
41
2019-08-23T13:35:34
True
False
False
Oct 13, 2015 at 06:27 AM
/api/public/snipt/141655/
get-similar-images-urls-from-google
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><a href="#L-1"> 1</a> <a href="#L-2"> 2</a> <a href="#L-3"> 3</a> <a href="#L-4"> 4</a> <a href="#L-5"> 5</a> <a href="#L-6"> 6</a> <a href="#L-7"> 7</a> <a href="#L-8"> 8</a> <a href="#L-9"> 9</a> <a href="#L-10">10</a> <a href="#L-11">11</a> <a href="#L-12">12</a> <a href="#L-13">13</a> <a href="#L-14">14</a> <a href="#L-15">15</a> <a href="#L-16">16</a> <a href="#L-17">17</a> <a href="#L-18">18</a> <a href="#L-19">19</a> <a href="#L-20">20</a> <a href="#L-21">21</a> <a href="#L-22">22</a> <a href="#L-23">23</a> <a href="#L-24">24</a> <a href="#L-25">25</a> <a href="#L-26">26</a> <a href="#L-27">27</a> <a href="#L-28">28</a> <a href="#L-29">29</a> <a href="#L-30">30</a> <a href="#L-31">31</a> <a href="#L-32">32</a> <a href="#L-33">33</a> <a href="#L-34">34</a> <a href="#L-35">35</a> <a href="#L-36">36</a> <a href="#L-37">37</a> <a href="#L-38">38</a> <a href="#L-39">39</a> <a href="#L-40">40</a> <a href="#L-41">41</a></pre></div></td><td class="code"><div class="highlight"><pre><span></span><span id="L-1"><a name="L-1"></a><span class="n">__author__</span> <span class="o">=</span> <span class="s1">&#39;omriko&#39;</span> </span><span id="L-2"><a name="L-2"></a><span class="kn">from</span> <span class="nn">selenium</span> <span class="kn">import</span> <span class="n">webdriver</span> </span><span id="L-3"><a name="L-3"></a><span class="kn">import</span> <span class="nn">urllib</span> </span><span id="L-4"><a name="L-4"></a><span class="kn">import</span> <span class="nn">urlparse</span> </span><span id="L-5"><a name="L-5"></a> </span><span id="L-6"><a name="L-6"></a><span class="k">def</span> <span class="nf">get_similar_images_urls</span><span class="p">(</span><span class="n">browser</span><span class="p">,</span> <span class="n">image_url</span><span class="p">):</span> </span><span id="L-7"><a name="L-7"></a> <span class="c1"># prepare image url</span> </span><span id="L-8"><a name="L-8"></a> <span class="n">google_image_search_url</span><span class="o">=</span> <span class="s2">&quot;https://www.google.co.il/searchbyimage?hl=en-IL&amp;image_url=&quot;</span> </span><span id="L-9"><a name="L-9"></a> <span class="n">image_url_encoded</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">image_url</span><span class="p">)</span> </span><span id="L-10"><a name="L-10"></a> <span class="n">image_search_url</span><span class="o">=</span> <span class="n">google_image_search_url</span> <span class="o">+</span> <span class="n">image_url_encoded</span> </span><span id="L-11"><a name="L-11"></a> </span><span id="L-12"><a name="L-12"></a> <span class="c1"># open google base search</span> </span><span id="L-13"><a name="L-13"></a> <span class="n">browser</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">image_search_url</span><span class="p">)</span> </span><span id="L-14"><a name="L-14"></a> </span><span id="L-15"><a name="L-15"></a> <span class="c1"># click on the &quot;Visually similar images&quot;</span> </span><span id="L-16"><a name="L-16"></a> <span class="n">browser</span><span class="o">.</span><span class="n">find_element_by_link_text</span><span class="p">(</span><span class="s1">&#39;Visually similar images&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">click</span><span class="p">()</span> </span><span id="L-17"><a name="L-17"></a> </span><span id="L-18"><a name="L-18"></a> <span class="c1"># get results list</span> </span><span id="L-19"><a name="L-19"></a> <span class="n">results</span> <span class="o">=</span> <span class="n">browser</span><span class="o">.</span><span class="n">find_elements_by_css_selector</span><span class="p">(</span><span class="s2">&quot;.rg_di.rg_el.ivg-i a&quot;</span><span class="p">)</span> </span><span id="L-20"><a name="L-20"></a> </span><span id="L-21"><a name="L-21"></a> <span class="n">links</span> <span class="o">=</span> <span class="p">[]</span> </span><span id="L-22"><a name="L-22"></a> </span><span id="L-23"><a name="L-23"></a> <span class="c1"># iterate results</span> </span><span id="L-24"><a name="L-24"></a> <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">results</span><span class="p">:</span> </span><span id="L-25"><a name="L-25"></a> <span class="c1"># get link from result object</span> </span><span id="L-26"><a name="L-26"></a> <span class="n">link</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get_attribute</span><span class="p">(</span><span class="s1">&#39;href&#39;</span><span class="p">)</span> </span><span id="L-27"><a name="L-27"></a> </span><span id="L-28"><a name="L-28"></a> <span class="c1"># link will be in the format: http://www.google.co.il/imgres?imgurl=...&amp;imgrefurl=...&amp;h=...&amp;w=...&amp;tbnid=...&amp;docid=...&amp;hl=...&amp;ei=...&amp;tbm=...</span> </span><span id="L-29"><a name="L-29"></a> <span class="c1"># we only need imagefurl, lets fetch it</span> </span><span id="L-30"><a name="L-30"></a> <span class="n">parsed_link</span> <span class="o">=</span> <span class="n">urlparse</span><span class="o">.</span><span class="n">urlparse</span><span class="p">(</span><span class="n">link</span><span class="p">)</span> </span><span id="L-31"><a name="L-31"></a> <span class="n">page_link</span> <span class="o">=</span> <span class="n">urlparse</span><span class="o">.</span><span class="n">parse_qs</span><span class="p">(</span><span class="n">parsed_link</span><span class="o">.</span><span class="n">query</span><span class="p">)[</span><span class="s1">&#39;imgrefurl&#39;</span><span class="p">]</span> </span><span id="L-32"><a name="L-32"></a> </span><span id="L-33"><a name="L-33"></a> <span class="n">links</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">page_link</span><span class="p">)</span> </span><span id="L-34"><a name="L-34"></a> </span><span id="L-35"><a name="L-35"></a> <span class="k">return</span> <span class="n">links</span> </span><span id="L-36"><a name="L-36"></a> </span><span id="L-37"><a name="L-37"></a><span class="c1"># open browser in URL</span> </span><span id="L-38"><a name="L-38"></a><span class="n">browser</span> <span class="o">=</span> <span class="n">webdriver</span><span class="o">.</span><span class="n">Firefox</span><span class="p">()</span> </span><span id="L-39"><a name="L-39"></a> </span><span id="L-40"><a name="L-40"></a><span class="c1"># fetch links</span> </span><span id="L-41"><a name="L-41"></a><span class="n">links</span> <span class="o">=</span> <span class="n">get_similar_images_urls</span><span class="p">(</span><span class="n">browser</span><span class="p">,</span> <span class="n">image_url</span><span class="p">)</span> </span></pre></div> </td></tr></table>
google, image, python, selenium
--- 
+++ 
@@ -0,0 +1,41 @@
+__author__ = 'omriko'
+from selenium import webdriver
+import urllib
+import urlparse
+
+def get_similar_images_urls(browser, image_url):
+    # prepare image url
+    google_image_search_url= "https://www.google.co.il/searchbyimage?hl=en-IL&image_url="
+    image_url_encoded = urllib.quote_plus(image_url)
+    image_search_url= google_image_search_url + image_url_encoded
+
+    # open google base search
+    browser.get(image_search_url)
+
+    # click on the "Visually similar images"
+    browser.find_element_by_link_text('Visually similar images').click()
+
+    # get results list
+    results = browser.find_elements_by_css_selector(".rg_di.rg_el.ivg-i a")
+
+    links = []
+
+    # iterate results
+    for result in results:
+        # get link from result object
+        link = result.get_attribute('href')
+
+        # link will be in the format: http://www.google.co.il/imgres?imgurl=...&imgrefurl=...&h=...&w=...&tbnid=...&docid=...&hl=...&ei=...&tbm=...
+        # we only need imagefurl, lets fetch it
+        parsed_link = urlparse.urlparse(link)
+        page_link = urlparse.parse_qs(parsed_link.query)['imgrefurl']
+
+        links.append(page_link)
+
+    return links
+
+# open browser in URL
+browser = webdriver.Firefox()
+
+# fetch links
+links = get_similar_images_urls(browser, image_url)
  • omriko
  • 3 years, 10 months ago