#!/usr/bin/env python import metakit import os from datetime import datetime import sys default_db = os.path.join(os.environ['HOME'], '.PyDS', 'var', 'weblog.data') def read_entries(db_filename, base_url, limit=0, **kw): limit = int(limit) db = metakit.storage(db_filename, 1) posts = db.view('posts') entries = [] for post in posts: if limit: limit -= 1 if not limit: break issued = datetime.fromtimestamp(post.pubtime) url = post.sourceurl if not url: url = (base_url + post.pubdate.replace('-', '/') + '#' + post.id) id = make_id(url, issued) if post.structured: content_type = 'text/x-restructured-text' else: content_type = 'text/html' entries.append(make_entry( id=id, title=post.title, url=url, issued=issued, modified_date=issued, content=post.text, content_type=content_type)) return entries def make_id(link, date): if link.startswith('http://'): link = link[7:] elif link.startswith('https://'): link = link[8:] link = link.replace('#', '/') if isinstance(date, str): date_text = date else: date_text = date.strftime('%Y-%m-%d') domain, url = link.split('/', 1) id = '%s,%s:%s' % (domain, date_text, url) id = 'tag:' + id return id def make_entry(id, title, url, issued, modified_date, content, content_type, parent_id=None, author_name=None, author_email=None, author_url=None): data = { 'id': enc(id), 'issued': enc_date(issued), 'modified_date': enc_date(modified_date), 'content': enc(content), 'content_type': enc(content_type), } entry = '\n' if title: entry += '%s\n' % enc(title) if url: entry += ('\n' % enc(url)) if parent_id: entry += ('\n' % parent_id) if author_name or author_email or author_url: entry += '\n' if author_name: entry += '%s' % enc(author_name) if author_email: entry += '%s' % enc(author_email) if author_url: entry += '%s' % enc(author_url) entry += '\n' entry += entry_template % data return entry entry_template = '''\ %(id)s %(issued)s %(modified_date)s %(content)s ''' def enc(v): for char, ent in [('&', '&'), ('<', '<'), ('>', '>'), ('"', '"')]: v = v.replace(char, ent) return v timezone = None def enc_date(date): if isinstance(date, str): return enc(date) v = date.strftime('%Y-%m-%dT%H:%M:%S') if timezone: v = v + timezone return v def make_feed(entries, title, url, author_name, author_email, tagline, modified=None, **kw): if modified is None: modified = datetime.now() data = { 'title': enc(title), 'modified': enc_date(modified), 'author_name': enc(author_name), 'author_email': enc(author_email), 'tagline': enc(tagline), } feed = '''\ %(title)s %(modified)s %(tagline)s ''' % data if url: feed += ('' % enc(url)) if author_name: feed += '%s\n' % enc(author_name) if author_email: feed += '%s\n' feed += '%s\n\n' % '\n'.join(entries) return feed def url_map(db_filename, base_url, **kw): db = metakit.storage(db_filename, 1) posts = db.view('posts') maps = [] for post in posts: url = post.sourceurl if not url: url = (base_url + post.pubdate.replace('-', '/') + '#' + post.id) id = make_id(url, datetime.fromtimestamp(post.pubtime)) maps.append((post.id, url, id)) return maps help = """\ Usage: python pyds_to_atom.py ATTRIBUTES > atom.xml ATTRIBUTES are key=value arguments. The keys (* means required): title*: The title of the *site*. url*: The URL of the *site*. author_name*: Your name, of course. author_email*: Your email address. tagline*: Your clever statement of blogging intent. timezone*: The timezone you live in, expressed like -04:00. - or + is required, and the leading zero is required (for atom). base_url*: The base URL of the (PyCS-style) site. The database doesn't hold the link (from what I can find). OR url_map=true: Create the url_map that pycs_to_atom.py uses (base_url is still required). """ def parse_args(default, required, help): data = default.copy() if '-h' in sys.argv[1:]: print help sys.exit() for arg in sys.argv[1:]: if not '=' in arg: print "Bad argument: %r" % arg sys.exit(2) name, value = arg.split('=', 1) data[name] = value if not data.get('url_map'): good = True for key in required: if not data.has_key(key): print "Key %r required" % key good = False if not good: print help sys.exit(2) return data if __name__ == '__main__': required = ''' title base_url url author_name author_email tagline timezone '''.split() data = parse_args({'db_filename': default_db}, required, help) if data.get('timezone'): timezone = data['timezone'] if not data.get('url_map'): entries = read_entries(**data) print make_feed(entries, **data) else: for short_id, url, tag in url_map(**data): print short_id, url, tag