/usr/lib/python2.7/dist-packages/xapers/sources/arxiv.py is in xapers 0.7.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import urllib
from HTMLParser import HTMLParser
from xapers.bibtex import data2bib
description = "Open access e-print service"
url = 'http://arxiv.org/'
url_format = 'http://arxiv.org/abs/%s'
url_regex = 'http://arxiv.org/(?:abs|pdf|format)/([^/]*)'
# http://arxiv.org/help/arxiv_identifier
scan_regex = 'arXiv:([0-9]{4}\.[0-9]{4,5})(?:v[0-9]+)?'
# html parser override to override handler methods
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.lefthead = False
self.title = None
self.author = []
self.year = None
self.sid = None
def handle_starttag(self, tag, attrs):
title = False
author = False
date = False
sid = False
if self.lefthead:
return
if tag != 'meta':
return
for attr in attrs:
if attr[0] == 'name':
if attr[1] == 'citation_title':
title = True
if attr[1] == 'citation_author':
author = True
if attr[1] == 'citation_date':
date = True
if attr[1] == 'citation_arxiv_id':
sid = True
if attr[0] == 'content':
if title:
self.title = attr[1]
if author:
self.author.append(attr[1])
if date:
self.year = attr[1].split('/')[0]
if sid:
self.sid = attr[1]
def handle_endtag(self, tag):
if tag == 'head':
self.lefthead = True
def fetch_bibtex(id):
url = url_format % id
f = urllib.urlopen(url)
html = f.read()
f.close()
parser = MyHTMLParser()
parser.feed(html)
data = {
'arxiv': id,
'title': parser.title,
'authors': parser.author,
'year': parser.year,
'eprint': id,
'url': url_format % id,
}
return data2bib(data, 'arxiv:%s' % id)
def fetch_file(id):
url = 'http://arxiv.org/pdf/%s' % id
f = urllib.urlopen(url)
data = f.read()
f.close()
name = '%s.pdf' % id
return name, data
|