This file is indexed.

/usr/lib/python2.7/dist-packages/xapers/sources/arxiv.py is in xapers 0.7.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import urllib
from HTMLParser import HTMLParser
from xapers.bibtex import data2bib

description = "Open access e-print service"

url = 'http://arxiv.org/'

url_format = 'http://arxiv.org/abs/%s'

url_regex = 'http://arxiv.org/(?:abs|pdf|format)/([^/]*)'

# http://arxiv.org/help/arxiv_identifier
scan_regex = 'arXiv:([0-9]{4}\.[0-9]{4,5})(?:v[0-9]+)?'

# html parser override to override handler methods
class MyHTMLParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.lefthead = False
        self.title = None
        self.author = []
        self.year = None
        self.sid = None

    def handle_starttag(self, tag, attrs):
        title = False
        author = False
        date = False
        sid = False

        if self.lefthead:
            return

        if tag != 'meta':
            return

        for attr in attrs:
            if attr[0] == 'name':
                if attr[1] == 'citation_title':
                    title = True
                if attr[1] == 'citation_author':
                    author = True
                if attr[1] == 'citation_date':
                    date = True
                if attr[1] == 'citation_arxiv_id':
                    sid = True

            if attr[0] == 'content':
                if title:
                    self.title = attr[1]
                if author:
                    self.author.append(attr[1])
                if date:
                    self.year = attr[1].split('/')[0]
                if sid:
                    self.sid = attr[1]

    def handle_endtag(self, tag):
        if tag == 'head':
            self.lefthead = True

def fetch_bibtex(id):
    url = url_format % id

    f = urllib.urlopen(url)
    html = f.read()
    f.close()

    parser = MyHTMLParser()
    parser.feed(html)

    data = {
        'arxiv':   id,
        'title':   parser.title,
        'authors': parser.author,
        'year':    parser.year,
        'eprint':  id,
        'url':     url_format % id,
        }

    return data2bib(data, 'arxiv:%s' % id)

def fetch_file(id):
    url = 'http://arxiv.org/pdf/%s' % id
    f = urllib.urlopen(url)
    data = f.read()
    f.close()
    name = '%s.pdf' % id
    return name, data