<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Aron Barbosa</title>
	<atom:link href="http://www.aronbarbosa.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://www.aronbarbosa.com</link>
	<description>a web developer from the PH</description>
	<lastBuildDate>Fri, 17 May 2013 01:07:57 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.5.1</generator>
		<item>
		<title>Starting Web Scraping with Python and BeautifulSoup 4</title>
		<link>http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/</link>
		<comments>http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/#comments</comments>
		<pubDate>Fri, 17 May 2013 01:07:57 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[beautifulsoup]]></category>
		<category><![CDATA[manga]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[scraping]]></category>

		<guid isPermaLink="false">http://www.aronbarbosa.com/?p=188</guid>
		<description><![CDATA[<p>I have been wanting to learn python for some time now so I&#8217;ve tried to create a small python script that downloads the latest manga from my favorite manga site, Mangastream. I used BeautifulSoup 4, a Python library for pulling<span class="ellipsis">&#8230;</span><div class="read-more"><a href="http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/">Read more &#8250;</a></div><!-- end of .read-more --></p><p>The post <a href="http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/">Starting Web Scraping with Python and BeautifulSoup 4</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>I have been wanting to learn python for some time now so I&#8217;ve tried to create a small python script that downloads the latest manga from my favorite manga site, <a href="http://mangastream.com">Mangastream</a>. I used <a href="http://www.crummy.com/software/BeautifulSoup/" title="BeautifulSoup 4" target="_blank">BeautifulSoup 4</a>, a Python library for pulling data out of HTML and XML files. Here&#8217;s the not so lovely code <img src='http://www.aronbarbosa.com/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' />  :</p>
<pre class="prettyprint">
<code class="language-python">
from os       import makedirs
from bs4      import BeautifulSoup
from urllib   import urlretrieve
from urllib2  import urlopen
from os.path  import exists, isfile

class Crawler:

  def __init__(self, url):
    self.url = url
    self.main_html = urlopen(url)
    self.main_soup = BeautifulSoup(self.main_html.read())

  def getManga(self):
    if self.main_html.code == 200:
      self.getFreshMangaList()

  def getFreshMangaList(self):
    for lists in self.main_soup.find_all('ul', {'class' : 'freshmanga'}):
      self.mangaList = lists.find_all('li', {'class' : 'new'})
      self.getFreshMangaListUrl(self.mangaList)
    
  def getFreshMangaListUrl(self, lists):
    for link in lists:
      mangaUrl = link.find('a').get('href')
      d = ImageDownloader(mangaUrl)
      d.download()

  def go(self):
    self.getManga()

class ImageDownloader:

  def __init__(self, url):
    self.url = url
    self.home_url = 'http://mangastream.com'

  def download(self):
    url_split = self.url.split('/')
    while url_split[-1] != 'end':

      print self.url
      
      dir_name = url_split[-3] + '_' + url_split[-2]

      self.downloadImage(self.url, dir_name, url_split[-1])

      html = urlopen(self.url)
      html_soup = BeautifulSoup(html.read())

      for next_html in html_soup.find_all(id="controls"):
        next_link = next_html.find('a', {'class' : 'active'}).find_next_sibling('a')

      self.url = self.home_url + next_link.get('href')
      url_split = self.url.split('/')

  def downloadImage(self, url, dir_name, cnt):
    dir_path = 'downloads/' + dir_name
    manga_html = urlopen(url)
    manga_soup = BeautifulSoup(manga_html.read())

    img = manga_soup.find('img', id='p')
    img_src = img.get('src')

    file_extension = img_src.split('/')[-1].split('.')[-1]
    file_name = dir_name + '_' + cnt + '.' + file_extension

    if not exists(dir_path):
      makedirs(dir_path)

    if not isfile(dir_path + '/' + file_name):
      try: 
        urlretrieve(img_src, dir_path + '/' + file_name)
        print('Downloading ' + file_name + '...')
      except:
        print('Download error!')
    else:
      print('Image already exists.')

def main():
  url = 'http://mangastream.com'
  c = Crawler(url)
  c.go()

if __name__ == '__main__':
  main()
</code>
</pre>
<p>You can fork the project on <a href="https://github.com/lucent-x/Mangastream-Downloader" title="Mangastream Downloader" target="_blank">github</a>.</p>
<p>The post <a href="http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/">Starting Web Scraping with Python and BeautifulSoup 4</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/starting-web-scraping-with-python-and-beautifulsoup-4/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Legit Manila</title>
		<link>http://www.aronbarbosa.com/portfolio/legit-manila/</link>
		<comments>http://www.aronbarbosa.com/portfolio/legit-manila/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:19:30 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=110</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/legit-manila/">Legit Manila</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/legit-manila/">Legit Manila</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/legit-manila/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>eServe BPO</title>
		<link>http://www.aronbarbosa.com/portfolio/eserve-bpo/</link>
		<comments>http://www.aronbarbosa.com/portfolio/eserve-bpo/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:17:54 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=108</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/eserve-bpo/">eServe BPO</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/eserve-bpo/">eServe BPO</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/eserve-bpo/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>YoloStrips</title>
		<link>http://www.aronbarbosa.com/portfolio/yolostrips/</link>
		<comments>http://www.aronbarbosa.com/portfolio/yolostrips/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:16:57 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=106</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/yolostrips/">YoloStrips</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/yolostrips/">YoloStrips</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/yolostrips/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Vintage Redevelopment</title>
		<link>http://www.aronbarbosa.com/portfolio/vintage-redevelopment/</link>
		<comments>http://www.aronbarbosa.com/portfolio/vintage-redevelopment/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:15:31 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[drupal]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>

		<guid isPermaLink="false">http://www.aronbarbosa.com/?post_type=portfolio&#038;p=155</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/vintage-redevelopment/">Vintage Redevelopment</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/vintage-redevelopment/">Vintage Redevelopment</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/vintage-redevelopment/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>AzChia-ChiaBia Pilipinas</title>
		<link>http://www.aronbarbosa.com/portfolio/azchia-chiabia-pilipinas/</link>
		<comments>http://www.aronbarbosa.com/portfolio/azchia-chiabia-pilipinas/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:14:59 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[cakephp]]></category>
		<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>

		<guid isPermaLink="false">http://www.aronbarbosa.com/?post_type=portfolio&#038;p=156</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/azchia-chiabia-pilipinas/">AzChia-ChiaBia Pilipinas</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/azchia-chiabia-pilipinas/">AzChia-ChiaBia Pilipinas</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/azchia-chiabia-pilipinas/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Zombie Apocalypse Survival Guide</title>
		<link>http://www.aronbarbosa.com/portfolio/zombie-apocalypse-survival-guide/</link>
		<comments>http://www.aronbarbosa.com/portfolio/zombie-apocalypse-survival-guide/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:14:55 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=104</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/zombie-apocalypse-survival-guide/">Zombie Apocalypse Survival Guide</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/zombie-apocalypse-survival-guide/">Zombie Apocalypse Survival Guide</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/zombie-apocalypse-survival-guide/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Oasis Counselling</title>
		<link>http://www.aronbarbosa.com/portfolio/oasis-counselling/</link>
		<comments>http://www.aronbarbosa.com/portfolio/oasis-counselling/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:12:11 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=102</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/oasis-counselling/">Oasis Counselling</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/oasis-counselling/">Oasis Counselling</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/oasis-counselling/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>The Art of Negotiating</title>
		<link>http://www.aronbarbosa.com/portfolio/the-art-of-negotiating/</link>
		<comments>http://www.aronbarbosa.com/portfolio/the-art-of-negotiating/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:07:09 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
				<category><![CDATA[CSS]]></category>
		<category><![CDATA[HTML]]></category>
		<category><![CDATA[jQuery]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=100</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/the-art-of-negotiating/">The Art of Negotiating</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/the-art-of-negotiating/">The Art of Negotiating</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/the-art-of-negotiating/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Best Tax Havens</title>
		<link>http://www.aronbarbosa.com/portfolio/best-tax-havens/</link>
		<comments>http://www.aronbarbosa.com/portfolio/best-tax-havens/#comments</comments>
		<pubDate>Wed, 13 Mar 2013 00:05:35 +0000</pubDate>
		<dc:creator>lucentx</dc:creator>
		
		<guid isPermaLink="false">http://aronbarbosa.com/?post_type=portfolio&#038;p=97</guid>
		<description><![CDATA[<p></p><p>The post <a href="http://www.aronbarbosa.com/portfolio/best-tax-havens/">Best Tax Havens</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></description>
				<content:encoded><![CDATA[<p>The post <a href="http://www.aronbarbosa.com/portfolio/best-tax-havens/">Best Tax Havens</a> appeared first on <a href="http://www.aronbarbosa.com">Aron Barbosa</a>.</p>]]></content:encoded>
			<wfw:commentRss>http://www.aronbarbosa.com/portfolio/best-tax-havens/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
