#!/usr/bin/env python
import metakit
import os
from datetime import datetime
import sys
default_db = os.path.join(os.environ['HOME'],
'.PyDS', 'var', 'weblog.data')
def read_entries(db_filename, base_url, limit=0, **kw):
limit = int(limit)
db = metakit.storage(db_filename, 1)
posts = db.view('posts')
entries = []
for post in posts:
if limit:
limit -= 1
if not limit:
break
issued = datetime.fromtimestamp(post.pubtime)
url = post.sourceurl
if not url:
url = (base_url
+ post.pubdate.replace('-', '/')
+ '#'
+ post.id)
id = make_id(url, issued)
if post.structured:
content_type = 'text/x-restructured-text'
else:
content_type = 'text/html'
entries.append(make_entry(
id=id,
title=post.title,
url=url,
issued=issued,
modified_date=issued,
content=post.text,
content_type=content_type))
return entries
def make_id(link, date):
if link.startswith('http://'):
link = link[7:]
elif link.startswith('https://'):
link = link[8:]
link = link.replace('#', '/')
if isinstance(date, str):
date_text = date
else:
date_text = date.strftime('%Y-%m-%d')
domain, url = link.split('/', 1)
id = '%s,%s:%s' % (domain, date_text, url)
id = 'tag:' + id
return id
def make_entry(id, title, url, issued, modified_date, content,
content_type, parent_id=None, author_name=None,
author_email=None, author_url=None):
data = {
'id': enc(id),
'issued': enc_date(issued),
'modified_date': enc_date(modified_date),
'content': enc(content),
'content_type': enc(content_type),
}
entry = '\n'
if title:
entry += '%s\n' % enc(title)
if url:
entry += ('\n' % enc(url))
if parent_id:
entry += ('\n' % parent_id)
if author_name or author_email or author_url:
entry += '\n'
if author_name:
entry += '%s' % enc(author_name)
if author_email:
entry += '%s' % enc(author_email)
if author_url:
entry += '%s' % enc(author_url)
entry += '\n'
entry += entry_template % data
return entry
entry_template = '''\
%(id)s%(issued)s%(modified_date)s
%(content)s
'''
def enc(v):
for char, ent in [('&', '&'), ('<', '<'), ('>', '>'),
('"', '"')]:
v = v.replace(char, ent)
return v
timezone = None
def enc_date(date):
if isinstance(date, str):
return enc(date)
v = date.strftime('%Y-%m-%dT%H:%M:%S')
if timezone:
v = v + timezone
return v
def make_feed(entries, title, url, author_name,
author_email, tagline, modified=None, **kw):
if modified is None:
modified = datetime.now()
data = {
'title': enc(title),
'modified': enc_date(modified),
'author_name': enc(author_name),
'author_email': enc(author_email),
'tagline': enc(tagline),
}
feed = '''\
%(title)s%(modified)s%(tagline)s
''' % data
if url:
feed += ('' % enc(url))
if author_name:
feed += '%s\n' % enc(author_name)
if author_email:
feed += '%s\n'
feed += '%s\n\n' % '\n'.join(entries)
return feed
def url_map(db_filename, base_url, **kw):
db = metakit.storage(db_filename, 1)
posts = db.view('posts')
maps = []
for post in posts:
url = post.sourceurl
if not url:
url = (base_url
+ post.pubdate.replace('-', '/')
+ '#'
+ post.id)
id = make_id(url, datetime.fromtimestamp(post.pubtime))
maps.append((post.id, url, id))
return maps
help = """\
Usage:
python pyds_to_atom.py ATTRIBUTES > atom.xml
ATTRIBUTES are key=value arguments. The keys (* means required):
title*:
The title of the *site*.
url*:
The URL of the *site*.
author_name*:
Your name, of course.
author_email*:
Your email address.
tagline*:
Your clever statement of blogging intent.
timezone*:
The timezone you live in, expressed like -04:00. - or + is
required, and the leading zero is required (for atom).
base_url*:
The base URL of the (PyCS-style) site. The database doesn't
hold the link (from what I can find).
OR
url_map=true:
Create the url_map that pycs_to_atom.py uses (base_url is still
required).
"""
def parse_args(default, required, help):
data = default.copy()
if '-h' in sys.argv[1:]:
print help
sys.exit()
for arg in sys.argv[1:]:
if not '=' in arg:
print "Bad argument: %r" % arg
sys.exit(2)
name, value = arg.split('=', 1)
data[name] = value
if not data.get('url_map'):
good = True
for key in required:
if not data.has_key(key):
print "Key %r required" % key
good = False
if not good:
print help
sys.exit(2)
return data
if __name__ == '__main__':
required = '''
title base_url url author_name author_email tagline timezone
'''.split()
data = parse_args({'db_filename': default_db}, required, help)
if data.get('timezone'):
timezone = data['timezone']
if not data.get('url_map'):
entries = read_entries(**data)
print make_feed(entries, **data)
else:
for short_id, url, tag in url_map(**data):
print short_id, url, tag