There arises several condition to extract the content of the page and display in our application. In such case we can use the BeautifulSoup and Mechanize python package.
The python code to extract the price of gold and silver form the website http://www.fenegosida.org/ is shown here
''''Reap gold price from http://www.fenegosida.org/
<h1> tag contains the prices. These h1 resides in following IDS + "-content"
Sample data
{'tejabi-1tola': u'53450', 'hallmark-1tola': u'53700', 'hallmark-10gms': u'46040', 'silver-1tola': u'860', 'tejabi-10gms': u'45825', 'silver-10gms': u'737.50'}
"""
URL = "http://www.fenegosida.org/"
IDS=["hallmark","tejabi","silver"]
import sys
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
if len(sys.argv) > 1 and sys.argv[1] == "-sample":
print "{'tejabi-1tola_new': u'53450', 'hallmark-1tola': u'53700', 'hallmark-10gms': u'46040', 'silver-1tola': u'860', 'tejabi-10gms': u'45825', 'silver-10gms': u'737.50'}"
sys.exit(0)
br = Browser()
br.addheaders = [
('user-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.3) Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3',),
('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',),
]
page = br.open(URL)
soup = BeautifulSoup(page.read())
price = {}
for id in IDS:
hallmark = soup.findAll('div',{'id':"{0}-content".format(id)})
a = hallmark[0].h1.text
b = hallmark[1].h1.text
if float(a) > float(b):
a, b = b, a
price['{0}-10gms'.format(id)] = a
price['{0}-1tola'.format(id)] = b
print price
sys.exit(0)
The python code to extract the price of gold and silver form the website http://www.fenegosida.org/ is shown here
''''Reap gold price from http://www.fenegosida.org/
<h1> tag contains the prices. These h1 resides in following IDS + "-content"
Sample data
{'tejabi-1tola': u'53450', 'hallmark-1tola': u'53700', 'hallmark-10gms': u'46040', 'silver-1tola': u'860', 'tejabi-10gms': u'45825', 'silver-10gms': u'737.50'}
"""
URL = "http://www.fenegosida.org/"
IDS=["hallmark","tejabi","silver"]
import sys
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
if len(sys.argv) > 1 and sys.argv[1] == "-sample":
print "{'tejabi-1tola_new': u'53450', 'hallmark-1tola': u'53700', 'hallmark-10gms': u'46040', 'silver-1tola': u'860', 'tejabi-10gms': u'45825', 'silver-10gms': u'737.50'}"
sys.exit(0)
br = Browser()
br.addheaders = [
('user-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.3) Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3',),
('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',),
]
page = br.open(URL)
soup = BeautifulSoup(page.read())
price = {}
for id in IDS:
hallmark = soup.findAll('div',{'id':"{0}-content".format(id)})
a = hallmark[0].h1.text
b = hallmark[1].h1.text
if float(a) > float(b):
a, b = b, a
price['{0}-10gms'.format(id)] = a
price['{0}-1tola'.format(id)] = b
print price
sys.exit(0)
No comments:
Post a Comment