# coding: utf-8

INDEX_URL = "http://www.nanet.go.kr/dl-web/hp/k04_abook_list.html?nav=040401&flag="

from BeautifulSoup import BeautifulSoup
import urllib
import re
import sys; reload(sys)

import datetime
import PyRSS2Gen

sys.setdefaultencoding('utf-8')

index_page = urllib.urlopen(INDEX_URL)
soup = BeautifulSoup(index_page)

base_url = '/'.join(index_page.url.split('/')[:-1]) + '/'
rss_items = []
for link in soup('a', {'href':re.compile('p_file')})[:5]:
    y, m, d = map(int, link.contents[0].split('/'))
    date_string = link.renderContents()
    href = base_url + link['href'].encode()

    body_soup = BeautifulSoup(urllib.urlopen(href))
    href = base_url + body_soup("iframe")[0]["src"].encode()
    descr = BeautifulSoup(urllib.urlopen(href)).body.renderContents()

    rss_item = PyRSS2Gen.RSSItem(
            author = "webw3@nanet.go.kr",
            title = date_string,
            link = href,
            description = descr,
            guid = PyRSS2Gen.Guid(link.get('href')),
            pubDate = datetime.datetime(y, m, d))
    rss_items.append(rss_item)

rss = PyRSS2Gen.RSS2(
        title = "국회도서관 단행본/비도서 신착안내",#soup.title.renderContents(),
        link = INDEX_URL,
        description = soup("td")[29].findAll("td")[1].renderContents(),
        lastBuildDate = datetime.datetime.now(),
        items = rss_items)

rss.write_xml(open("new-separate-volume.xml", "w"), "UTF-8")
