snipt

Ctrl+h for KB shortcuts
Notice: Snipt is moving away from free accounts on May 1st, 2014. Read more about the transition here.
#8939

Python

Extracting toll fares from http://www.plus.com.my/miniquery/index.asp.

#!/usr/bin/env python

import urllib, httplib, re, sys
from time import strftime

# Sorted list
tolls = [
'AHT', 'AKH', 'ASS', 'ASU',
'BBR', 'BDR', 'BGS', 'BKB',
'BKH', 'BKM', 'BKR', 'BRG',
'BRT', 'BSP', 'BTR', 'BTS',
'BTT', 'BTU', 'CKJ', 'DMR',
'EBS', 'EBU', 'GCE', 'GPG',
'GRN', 'HKG', 'HSB', 'IPS',
'JBC', 'JLD', 'JLP', 'JRU',
'JSN', 'JTR', 'JWI', 'KBS',
'KDR', 'KJG', 'KKS', 'KLA',
'KLI', 'KPS', 'LBB', 'LKT',
'LMK', 'LNS', 'MAC', 'MBU',
'NLI', 'PDG', 'PDO', 'PGH',
'PHT', 'PLG', 'PLI', 'PPM',
'PSR', 'PTJ', 'RAW', 'SAT',
'SBG', 'SBI', 'SBN', 'SDK',
'SEA', 'SGB', 'SGD', 'SGR',
'SHA', 'SKD', 'SKI', 'SNU',
'SPP', 'SPR', 'SPS', 'SPU',
'STA', 'SWG', 'TGK', 'TGM',
'TJK', 'TPH', 'TPU', 'UPM',
'USJ', 'YPS', 'YPU',
]

# Retrieve session ID
def getCookie():
	http = httplib.HTTPConnection('www.plus.com.my', 80)
	http.request('GET', '/index.asp')
	response = http.getresponse()
	cookie = response.getheader('set-cookie').split(';')[0]
	http.close()
	return cookie

# Get HTML output
def getData(start, end, vclass, cookie):
	headers = {
		'Content-type': 'application/x-www-form-urlencoded',
		'Cookie' : cookie,
	}
	
	http = httplib.HTTPConnection('www.plus.com.my', 80)
	body = urllib.urlencode({'startloc' : start, 'endloc' : end, 'vclass' : vclass})
	http.request('POST', '/miniquery/fare_details.asp', body, headers)
	response = http.getresponse()
	if response.status == 200:
		data = response.read()
		http.close()
		return data

	http.close()
	return False

def process(start, end, f, log):
	try:	
		record = [tolls[start], tolls[end]]
		# Class 1
		data = getData(tolls[start], tolls[end], 1, cookie)
	
		# Err check
		if data is False:
			log.write('%s HTTP error (%s, %s)\n' % (strftime('%Y-%m-%d %I:%M:%S'), tolls[start], tolls[end]))
			log.flush()
			return
	
		record.append(normal.findall(data)[0])
		record.append(offpeak.findall(data)[0])
		record.append(festive.findall(data)[0])
	
		# Class 2 ~ 5
		for vclass in xrange(2, 6):
			data = getData(tolls[start], tolls[end], vclass, cookie)
			if data == False:
				log.write('%s HTTP error (%s, %s)\n' % (strftime('%Y-%m-%d %I:%M:%S'), tolls[start], tolls[end]))
				break
		
			record.append(normal.findall(data)[0])			
	
		record.append(distance.findall(data)[0])
		record = '"' + ('","').join(record) + '"'
		f.write(record + '\n')
		f.flush()
	except Exception: 
		log.write('%s Unknown error (%s, %s)\n' % (strftime('%Y-%m-%d %I:%M:%S'), tolls[start], tolls[end]))
		log.flush()
		return # Make sure extraction continues

# Regrex for grabbing data
normal = re.compile(r'Normal Rates<br></font>\s*RM (\d+\.\d+)')
offpeak = re.compile(r'Off Peak Rates:<br></font> RM (\d+\.\d+)')
festive = re.compile(r'Off Peak Festive Rates:<br></font>RM (\d+\.\d+)')
distance = re.compile(r'Total Distance:<br></font>\s*(\d+(?:\.\d+)?) km')

f = open('data.csv', 'w')
log = open('log.txt', 'w')

cookie = getCookie() # Use this session ID for all request
count = 0 # Count num of request made
for start in xrange(0, len(tolls) - 1):
	for end in xrange(start + 1, len(tolls)):
		count += 1
		print 'Processing %s/7656(%s, %s)' % (count, tolls[start], tolls[end])
		sys.stdout.flush()
		process(start, end, f, log)
        
		# Swap start and end points
		count += 1
		print 'Processing %s/7656(%s, %s)' % (count, tolls[end], tolls[start])
		sys.stdout.flush()
		process(end, start, f, log)
log.close()
f.close()
https://snipt.net/embed/55eb4040ac8aaa74bc4b27e1ea4936bb/
https://snipt.net/raw/55eb4040ac8aaa74bc4b27e1ea4936bb/
55eb4040ac8aaa74bc4b27e1ea4936bb
python
Python
118
2014-04-25T04:50:39
True
False
/api/public/snipt/8939/
extracting-toll-fares-from-httpwwwpluscommyminiqueryindexasp
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><a href="#L-1"> 1</a> <a href="#L-2"> 2</a> <a href="#L-3"> 3</a> <a href="#L-4"> 4</a> <a href="#L-5"> 5</a> <a href="#L-6"> 6</a> <a href="#L-7"> 7</a> <a href="#L-8"> 8</a> <a href="#L-9"> 9</a> <a href="#L-10"> 10</a> <a href="#L-11"> 11</a> <a href="#L-12"> 12</a> <a href="#L-13"> 13</a> <a href="#L-14"> 14</a> <a href="#L-15"> 15</a> <a href="#L-16"> 16</a> <a href="#L-17"> 17</a> <a href="#L-18"> 18</a> <a href="#L-19"> 19</a> <a href="#L-20"> 20</a> <a href="#L-21"> 21</a> <a href="#L-22"> 22</a> <a href="#L-23"> 23</a> <a href="#L-24"> 24</a> <a href="#L-25"> 25</a> <a href="#L-26"> 26</a> <a href="#L-27"> 27</a> <a href="#L-28"> 28</a> <a href="#L-29"> 29</a> <a href="#L-30"> 30</a> <a href="#L-31"> 31</a> <a href="#L-32"> 32</a> <a href="#L-33"> 33</a> <a href="#L-34"> 34</a> <a href="#L-35"> 35</a> <a href="#L-36"> 36</a> <a href="#L-37"> 37</a> <a href="#L-38"> 38</a> <a href="#L-39"> 39</a> <a href="#L-40"> 40</a> <a href="#L-41"> 41</a> <a href="#L-42"> 42</a> <a href="#L-43"> 43</a> <a href="#L-44"> 44</a> <a href="#L-45"> 45</a> <a href="#L-46"> 46</a> <a href="#L-47"> 47</a> <a href="#L-48"> 48</a> <a href="#L-49"> 49</a> <a href="#L-50"> 50</a> <a href="#L-51"> 51</a> <a href="#L-52"> 52</a> <a href="#L-53"> 53</a> <a href="#L-54"> 54</a> <a href="#L-55"> 55</a> <a href="#L-56"> 56</a> <a href="#L-57"> 57</a> <a href="#L-58"> 58</a> <a href="#L-59"> 59</a> <a href="#L-60"> 60</a> <a href="#L-61"> 61</a> <a href="#L-62"> 62</a> <a href="#L-63"> 63</a> <a href="#L-64"> 64</a> <a href="#L-65"> 65</a> <a href="#L-66"> 66</a> <a href="#L-67"> 67</a> <a href="#L-68"> 68</a> <a href="#L-69"> 69</a> <a href="#L-70"> 70</a> <a href="#L-71"> 71</a> <a href="#L-72"> 72</a> <a href="#L-73"> 73</a> <a href="#L-74"> 74</a> <a href="#L-75"> 75</a> <a href="#L-76"> 76</a> <a href="#L-77"> 77</a> <a href="#L-78"> 78</a> <a href="#L-79"> 79</a> <a href="#L-80"> 80</a> <a href="#L-81"> 81</a> <a href="#L-82"> 82</a> <a href="#L-83"> 83</a> <a href="#L-84"> 84</a> <a href="#L-85"> 85</a> <a href="#L-86"> 86</a> <a href="#L-87"> 87</a> <a href="#L-88"> 88</a> <a href="#L-89"> 89</a> <a href="#L-90"> 90</a> <a href="#L-91"> 91</a> <a href="#L-92"> 92</a> <a href="#L-93"> 93</a> <a href="#L-94"> 94</a> <a href="#L-95"> 95</a> <a href="#L-96"> 96</a> <a href="#L-97"> 97</a> <a href="#L-98"> 98</a> <a href="#L-99"> 99</a> <a href="#L-100">100</a> <a href="#L-101">101</a> <a href="#L-102">102</a> <a href="#L-103">103</a> <a href="#L-104">104</a> <a href="#L-105">105</a> <a href="#L-106">106</a> <a href="#L-107">107</a> <a href="#L-108">108</a> <a href="#L-109">109</a> <a href="#L-110">110</a> <a href="#L-111">111</a> <a href="#L-112">112</a> <a href="#L-113">113</a> <a href="#L-114">114</a> <a href="#L-115">115</a> <a href="#L-116">116</a> <a href="#L-117">117</a> <a href="#L-118">118</a></pre></div></td><td class="code"><div class="highlight"><pre><span id="L-1"><a name="L-1"></a><span class="c">#!/usr/bin/env python</span> </span><span id="L-2"><a name="L-2"></a> </span><span id="L-3"><a name="L-3"></a><span class="kn">import</span> <span class="nn">urllib</span><span class="o">,</span> <span class="nn">httplib</span><span class="o">,</span> <span class="nn">re</span><span class="o">,</span> <span class="nn">sys</span> </span><span id="L-4"><a name="L-4"></a><span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">strftime</span> </span><span id="L-5"><a name="L-5"></a> </span><span id="L-6"><a name="L-6"></a><span class="c"># Sorted list</span> </span><span id="L-7"><a name="L-7"></a><span class="n">tolls</span> <span class="o">=</span> <span class="p">[</span> </span><span id="L-8"><a name="L-8"></a><span class="s">&#39;AHT&#39;</span><span class="p">,</span> <span class="s">&#39;AKH&#39;</span><span class="p">,</span> <span class="s">&#39;ASS&#39;</span><span class="p">,</span> <span class="s">&#39;ASU&#39;</span><span class="p">,</span> </span><span id="L-9"><a name="L-9"></a><span class="s">&#39;BBR&#39;</span><span class="p">,</span> <span class="s">&#39;BDR&#39;</span><span class="p">,</span> <span class="s">&#39;BGS&#39;</span><span class="p">,</span> <span class="s">&#39;BKB&#39;</span><span class="p">,</span> </span><span id="L-10"><a name="L-10"></a><span class="s">&#39;BKH&#39;</span><span class="p">,</span> <span class="s">&#39;BKM&#39;</span><span class="p">,</span> <span class="s">&#39;BKR&#39;</span><span class="p">,</span> <span class="s">&#39;BRG&#39;</span><span class="p">,</span> </span><span id="L-11"><a name="L-11"></a><span class="s">&#39;BRT&#39;</span><span class="p">,</span> <span class="s">&#39;BSP&#39;</span><span class="p">,</span> <span class="s">&#39;BTR&#39;</span><span class="p">,</span> <span class="s">&#39;BTS&#39;</span><span class="p">,</span> </span><span id="L-12"><a name="L-12"></a><span class="s">&#39;BTT&#39;</span><span class="p">,</span> <span class="s">&#39;BTU&#39;</span><span class="p">,</span> <span class="s">&#39;CKJ&#39;</span><span class="p">,</span> <span class="s">&#39;DMR&#39;</span><span class="p">,</span> </span><span id="L-13"><a name="L-13"></a><span class="s">&#39;EBS&#39;</span><span class="p">,</span> <span class="s">&#39;EBU&#39;</span><span class="p">,</span> <span class="s">&#39;GCE&#39;</span><span class="p">,</span> <span class="s">&#39;GPG&#39;</span><span class="p">,</span> </span><span id="L-14"><a name="L-14"></a><span class="s">&#39;GRN&#39;</span><span class="p">,</span> <span class="s">&#39;HKG&#39;</span><span class="p">,</span> <span class="s">&#39;HSB&#39;</span><span class="p">,</span> <span class="s">&#39;IPS&#39;</span><span class="p">,</span> </span><span id="L-15"><a name="L-15"></a><span class="s">&#39;JBC&#39;</span><span class="p">,</span> <span class="s">&#39;JLD&#39;</span><span class="p">,</span> <span class="s">&#39;JLP&#39;</span><span class="p">,</span> <span class="s">&#39;JRU&#39;</span><span class="p">,</span> </span><span id="L-16"><a name="L-16"></a><span class="s">&#39;JSN&#39;</span><span class="p">,</span> <span class="s">&#39;JTR&#39;</span><span class="p">,</span> <span class="s">&#39;JWI&#39;</span><span class="p">,</span> <span class="s">&#39;KBS&#39;</span><span class="p">,</span> </span><span id="L-17"><a name="L-17"></a><span class="s">&#39;KDR&#39;</span><span class="p">,</span> <span class="s">&#39;KJG&#39;</span><span class="p">,</span> <span class="s">&#39;KKS&#39;</span><span class="p">,</span> <span class="s">&#39;KLA&#39;</span><span class="p">,</span> </span><span id="L-18"><a name="L-18"></a><span class="s">&#39;KLI&#39;</span><span class="p">,</span> <span class="s">&#39;KPS&#39;</span><span class="p">,</span> <span class="s">&#39;LBB&#39;</span><span class="p">,</span> <span class="s">&#39;LKT&#39;</span><span class="p">,</span> </span><span id="L-19"><a name="L-19"></a><span class="s">&#39;LMK&#39;</span><span class="p">,</span> <span class="s">&#39;LNS&#39;</span><span class="p">,</span> <span class="s">&#39;MAC&#39;</span><span class="p">,</span> <span class="s">&#39;MBU&#39;</span><span class="p">,</span> </span><span id="L-20"><a name="L-20"></a><span class="s">&#39;NLI&#39;</span><span class="p">,</span> <span class="s">&#39;PDG&#39;</span><span class="p">,</span> <span class="s">&#39;PDO&#39;</span><span class="p">,</span> <span class="s">&#39;PGH&#39;</span><span class="p">,</span> </span><span id="L-21"><a name="L-21"></a><span class="s">&#39;PHT&#39;</span><span class="p">,</span> <span class="s">&#39;PLG&#39;</span><span class="p">,</span> <span class="s">&#39;PLI&#39;</span><span class="p">,</span> <span class="s">&#39;PPM&#39;</span><span class="p">,</span> </span><span id="L-22"><a name="L-22"></a><span class="s">&#39;PSR&#39;</span><span class="p">,</span> <span class="s">&#39;PTJ&#39;</span><span class="p">,</span> <span class="s">&#39;RAW&#39;</span><span class="p">,</span> <span class="s">&#39;SAT&#39;</span><span class="p">,</span> </span><span id="L-23"><a name="L-23"></a><span class="s">&#39;SBG&#39;</span><span class="p">,</span> <span class="s">&#39;SBI&#39;</span><span class="p">,</span> <span class="s">&#39;SBN&#39;</span><span class="p">,</span> <span class="s">&#39;SDK&#39;</span><span class="p">,</span> </span><span id="L-24"><a name="L-24"></a><span class="s">&#39;SEA&#39;</span><span class="p">,</span> <span class="s">&#39;SGB&#39;</span><span class="p">,</span> <span class="s">&#39;SGD&#39;</span><span class="p">,</span> <span class="s">&#39;SGR&#39;</span><span class="p">,</span> </span><span id="L-25"><a name="L-25"></a><span class="s">&#39;SHA&#39;</span><span class="p">,</span> <span class="s">&#39;SKD&#39;</span><span class="p">,</span> <span class="s">&#39;SKI&#39;</span><span class="p">,</span> <span class="s">&#39;SNU&#39;</span><span class="p">,</span> </span><span id="L-26"><a name="L-26"></a><span class="s">&#39;SPP&#39;</span><span class="p">,</span> <span class="s">&#39;SPR&#39;</span><span class="p">,</span> <span class="s">&#39;SPS&#39;</span><span class="p">,</span> <span class="s">&#39;SPU&#39;</span><span class="p">,</span> </span><span id="L-27"><a name="L-27"></a><span class="s">&#39;STA&#39;</span><span class="p">,</span> <span class="s">&#39;SWG&#39;</span><span class="p">,</span> <span class="s">&#39;TGK&#39;</span><span class="p">,</span> <span class="s">&#39;TGM&#39;</span><span class="p">,</span> </span><span id="L-28"><a name="L-28"></a><span class="s">&#39;TJK&#39;</span><span class="p">,</span> <span class="s">&#39;TPH&#39;</span><span class="p">,</span> <span class="s">&#39;TPU&#39;</span><span class="p">,</span> <span class="s">&#39;UPM&#39;</span><span class="p">,</span> </span><span id="L-29"><a name="L-29"></a><span class="s">&#39;USJ&#39;</span><span class="p">,</span> <span class="s">&#39;YPS&#39;</span><span class="p">,</span> <span class="s">&#39;YPU&#39;</span><span class="p">,</span> </span><span id="L-30"><a name="L-30"></a><span class="p">]</span> </span><span id="L-31"><a name="L-31"></a> </span><span id="L-32"><a name="L-32"></a><span class="c"># Retrieve session ID</span> </span><span id="L-33"><a name="L-33"></a><span class="k">def</span> <span class="nf">getCookie</span><span class="p">():</span> </span><span id="L-34"><a name="L-34"></a> <span class="n">http</span> <span class="o">=</span> <span class="n">httplib</span><span class="o">.</span><span class="n">HTTPConnection</span><span class="p">(</span><span class="s">&#39;www.plus.com.my&#39;</span><span class="p">,</span> <span class="mi">80</span><span class="p">)</span> </span><span id="L-35"><a name="L-35"></a> <span class="n">http</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="s">&#39;GET&#39;</span><span class="p">,</span> <span class="s">&#39;/index.asp&#39;</span><span class="p">)</span> </span><span id="L-36"><a name="L-36"></a> <span class="n">response</span> <span class="o">=</span> <span class="n">http</span><span class="o">.</span><span class="n">getresponse</span><span class="p">()</span> </span><span id="L-37"><a name="L-37"></a> <span class="n">cookie</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">getheader</span><span class="p">(</span><span class="s">&#39;set-cookie&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s">&#39;;&#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> </span><span id="L-38"><a name="L-38"></a> <span class="n">http</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> </span><span id="L-39"><a name="L-39"></a> <span class="k">return</span> <span class="n">cookie</span> </span><span id="L-40"><a name="L-40"></a> </span><span id="L-41"><a name="L-41"></a><span class="c"># Get HTML output</span> </span><span id="L-42"><a name="L-42"></a><span class="k">def</span> <span class="nf">getData</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">,</span> <span class="n">vclass</span><span class="p">,</span> <span class="n">cookie</span><span class="p">):</span> </span><span id="L-43"><a name="L-43"></a> <span class="n">headers</span> <span class="o">=</span> <span class="p">{</span> </span><span id="L-44"><a name="L-44"></a> <span class="s">&#39;Content-type&#39;</span><span class="p">:</span> <span class="s">&#39;application/x-www-form-urlencoded&#39;</span><span class="p">,</span> </span><span id="L-45"><a name="L-45"></a> <span class="s">&#39;Cookie&#39;</span> <span class="p">:</span> <span class="n">cookie</span><span class="p">,</span> </span><span id="L-46"><a name="L-46"></a> <span class="p">}</span> </span><span id="L-47"><a name="L-47"></a> </span><span id="L-48"><a name="L-48"></a> <span class="n">http</span> <span class="o">=</span> <span class="n">httplib</span><span class="o">.</span><span class="n">HTTPConnection</span><span class="p">(</span><span class="s">&#39;www.plus.com.my&#39;</span><span class="p">,</span> <span class="mi">80</span><span class="p">)</span> </span><span id="L-49"><a name="L-49"></a> <span class="n">body</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">urlencode</span><span class="p">({</span><span class="s">&#39;startloc&#39;</span> <span class="p">:</span> <span class="n">start</span><span class="p">,</span> <span class="s">&#39;endloc&#39;</span> <span class="p">:</span> <span class="n">end</span><span class="p">,</span> <span class="s">&#39;vclass&#39;</span> <span class="p">:</span> <span class="n">vclass</span><span class="p">})</span> </span><span id="L-50"><a name="L-50"></a> <span class="n">http</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="s">&#39;POST&#39;</span><span class="p">,</span> <span class="s">&#39;/miniquery/fare_details.asp&#39;</span><span class="p">,</span> <span class="n">body</span><span class="p">,</span> <span class="n">headers</span><span class="p">)</span> </span><span id="L-51"><a name="L-51"></a> <span class="n">response</span> <span class="o">=</span> <span class="n">http</span><span class="o">.</span><span class="n">getresponse</span><span class="p">()</span> </span><span id="L-52"><a name="L-52"></a> <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status</span> <span class="o">==</span> <span class="mi">200</span><span class="p">:</span> </span><span id="L-53"><a name="L-53"></a> <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> </span><span id="L-54"><a name="L-54"></a> <span class="n">http</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> </span><span id="L-55"><a name="L-55"></a> <span class="k">return</span> <span class="n">data</span> </span><span id="L-56"><a name="L-56"></a> </span><span id="L-57"><a name="L-57"></a> <span class="n">http</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> </span><span id="L-58"><a name="L-58"></a> <span class="k">return</span> <span class="bp">False</span> </span><span id="L-59"><a name="L-59"></a> </span><span id="L-60"><a name="L-60"></a><span class="k">def</span> <span class="nf">process</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">log</span><span class="p">):</span> </span><span id="L-61"><a name="L-61"></a> <span class="k">try</span><span class="p">:</span> </span><span id="L-62"><a name="L-62"></a> <span class="n">record</span> <span class="o">=</span> <span class="p">[</span><span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">]]</span> </span><span id="L-63"><a name="L-63"></a> <span class="c"># Class 1</span> </span><span id="L-64"><a name="L-64"></a> <span class="n">data</span> <span class="o">=</span> <span class="n">getData</span><span class="p">(</span><span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">],</span> <span class="mi">1</span><span class="p">,</span> <span class="n">cookie</span><span class="p">)</span> </span><span id="L-65"><a name="L-65"></a> </span><span id="L-66"><a name="L-66"></a> <span class="c"># Err check</span> </span><span id="L-67"><a name="L-67"></a> <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="bp">False</span><span class="p">:</span> </span><span id="L-68"><a name="L-68"></a> <span class="n">log</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">&#39;</span><span class="si">%s</span><span class="s"> HTTP error (</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)</span><span class="se">\n</span><span class="s">&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">strftime</span><span class="p">(</span><span class="s">&#39;%Y-%m-</span><span class="si">%d</span><span class="s"> %I:%M:%S&#39;</span><span class="p">),</span> <span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">]))</span> </span><span id="L-69"><a name="L-69"></a> <span class="n">log</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> </span><span id="L-70"><a name="L-70"></a> <span class="k">return</span> </span><span id="L-71"><a name="L-71"></a> </span><span id="L-72"><a name="L-72"></a> <span class="n">record</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">normal</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">data</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> </span><span id="L-73"><a name="L-73"></a> <span class="n">record</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">offpeak</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">data</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> </span><span id="L-74"><a name="L-74"></a> <span class="n">record</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">festive</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">data</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> </span><span id="L-75"><a name="L-75"></a> </span><span id="L-76"><a name="L-76"></a> <span class="c"># Class 2 ~ 5</span> </span><span id="L-77"><a name="L-77"></a> <span class="k">for</span> <span class="n">vclass</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">6</span><span class="p">):</span> </span><span id="L-78"><a name="L-78"></a> <span class="n">data</span> <span class="o">=</span> <span class="n">getData</span><span class="p">(</span><span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">],</span> <span class="n">vclass</span><span class="p">,</span> <span class="n">cookie</span><span class="p">)</span> </span><span id="L-79"><a name="L-79"></a> <span class="k">if</span> <span class="n">data</span> <span class="o">==</span> <span class="bp">False</span><span class="p">:</span> </span><span id="L-80"><a name="L-80"></a> <span class="n">log</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">&#39;</span><span class="si">%s</span><span class="s"> HTTP error (</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)</span><span class="se">\n</span><span class="s">&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">strftime</span><span class="p">(</span><span class="s">&#39;%Y-%m-</span><span class="si">%d</span><span class="s"> %I:%M:%S&#39;</span><span class="p">),</span> <span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">]))</span> </span><span id="L-81"><a name="L-81"></a> <span class="k">break</span> </span><span id="L-82"><a name="L-82"></a> </span><span id="L-83"><a name="L-83"></a> <span class="n">record</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">normal</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">data</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> </span><span id="L-84"><a name="L-84"></a> </span><span id="L-85"><a name="L-85"></a> <span class="n">record</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">distance</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">data</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> </span><span id="L-86"><a name="L-86"></a> <span class="n">record</span> <span class="o">=</span> <span class="s">&#39;&quot;&#39;</span> <span class="o">+</span> <span class="p">(</span><span class="s">&#39;&quot;,&quot;&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="p">)</span> <span class="o">+</span> <span class="s">&#39;&quot;&#39;</span> </span><span id="L-87"><a name="L-87"></a> <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">record</span> <span class="o">+</span> <span class="s">&#39;</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">)</span> </span><span id="L-88"><a name="L-88"></a> <span class="n">f</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> </span><span id="L-89"><a name="L-89"></a> <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> </span><span id="L-90"><a name="L-90"></a> <span class="n">log</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">&#39;</span><span class="si">%s</span><span class="s"> Unknown error (</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)</span><span class="se">\n</span><span class="s">&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">strftime</span><span class="p">(</span><span class="s">&#39;%Y-%m-</span><span class="si">%d</span><span class="s"> %I:%M:%S&#39;</span><span class="p">),</span> <span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">]))</span> </span><span id="L-91"><a name="L-91"></a> <span class="n">log</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> </span><span id="L-92"><a name="L-92"></a> <span class="k">return</span> <span class="c"># Make sure extraction continues</span> </span><span id="L-93"><a name="L-93"></a> </span><span id="L-94"><a name="L-94"></a><span class="c"># Regrex for grabbing data</span> </span><span id="L-95"><a name="L-95"></a><span class="n">normal</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r&#39;Normal Rates&lt;br&gt;&lt;/font&gt;\s*RM (\d+\.\d+)&#39;</span><span class="p">)</span> </span><span id="L-96"><a name="L-96"></a><span class="n">offpeak</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r&#39;Off Peak Rates:&lt;br&gt;&lt;/font&gt; RM (\d+\.\d+)&#39;</span><span class="p">)</span> </span><span id="L-97"><a name="L-97"></a><span class="n">festive</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r&#39;Off Peak Festive Rates:&lt;br&gt;&lt;/font&gt;RM (\d+\.\d+)&#39;</span><span class="p">)</span> </span><span id="L-98"><a name="L-98"></a><span class="n">distance</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r&#39;Total Distance:&lt;br&gt;&lt;/font&gt;\s*(\d+(?:\.\d+)?) km&#39;</span><span class="p">)</span> </span><span id="L-99"><a name="L-99"></a> </span><span id="L-100"><a name="L-100"></a><span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">&#39;data.csv&#39;</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> </span><span id="L-101"><a name="L-101"></a><span class="n">log</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">&#39;log.txt&#39;</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> </span><span id="L-102"><a name="L-102"></a> </span><span id="L-103"><a name="L-103"></a><span class="n">cookie</span> <span class="o">=</span> <span class="n">getCookie</span><span class="p">()</span> <span class="c"># Use this session ID for all request</span> </span><span id="L-104"><a name="L-104"></a><span class="n">count</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Count num of request made</span> </span><span id="L-105"><a name="L-105"></a><span class="k">for</span> <span class="n">start</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">tolls</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span> </span><span id="L-106"><a name="L-106"></a> <span class="k">for</span> <span class="n">end</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">start</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">tolls</span><span class="p">)):</span> </span><span id="L-107"><a name="L-107"></a> <span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span> </span><span id="L-108"><a name="L-108"></a> <span class="k">print</span> <span class="s">&#39;Processing </span><span class="si">%s</span><span class="s">/7656(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">])</span> </span><span id="L-109"><a name="L-109"></a> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> </span><span id="L-110"><a name="L-110"></a> <span class="n">process</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">log</span><span class="p">)</span> </span><span id="L-111"><a name="L-111"></a> </span><span id="L-112"><a name="L-112"></a> <span class="c"># Swap start and end points</span> </span><span id="L-113"><a name="L-113"></a> <span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span> </span><span id="L-114"><a name="L-114"></a> <span class="k">print</span> <span class="s">&#39;Processing </span><span class="si">%s</span><span class="s">/7656(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">tolls</span><span class="p">[</span><span class="n">end</span><span class="p">],</span> <span class="n">tolls</span><span class="p">[</span><span class="n">start</span><span class="p">])</span> </span><span id="L-115"><a name="L-115"></a> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> </span><span id="L-116"><a name="L-116"></a> <span class="n">process</span><span class="p">(</span><span class="n">end</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">log</span><span class="p">)</span> </span><span id="L-117"><a name="L-117"></a><span class="n">log</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> </span><span id="L-118"><a name="L-118"></a><span class="n">f</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> </span></pre></div> </td></tr></table>
"data extraction", "north south highway", "python. malaysia", http, plus, regrex, toll