1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """ Schema definition for a pyblio database. When a database is
22 created, the schema is instantiated from a template. The user can then
23 customize it.
24
25 At the moment, a schema contains a dictionnary of known document
26 types. For each document, it is possible to know the mandatory and
27 optional fields that describe the document. These fields are typed.
28
29 """
30
31 from gettext import gettext as _
32
33 from xml.sax.saxutils import escape
34
35 from Pyblio.Attribute import N_to_C, C_to_N, Txo
36 from Pyblio import I18n, Compat
37
39
41
43
44 self.id = None
45 self.names = {}
46 self.txo = {}
47
48 if file:
49 tree = Compat.ElementTree.ElementTree (file = file)
50 self.xmlread (tree.getroot ())
51 return
52
55
56 name = property (_name_get)
57
58
60 self.id = tree.attrib.get('id', None)
61
62 for name in tree.findall ('./name'):
63 lang = name.attrib.get ('lang', '')
64 self.names [lang] = name.text
65
66
67 def parseattr (attr):
68 aid = attr.attrib ['id']
69
70 try:
71 atype = N_to_C [attr.attrib ['type']]
72 except KeyError:
73 raise SchemaError ('attribute %s has an unknown type' % repr (aid))
74
75 if atype is Txo:
76 a = TxoAttribute(aid)
77 else:
78 a = Attribute (aid)
79
80 a.type = atype
81 a.indexed = attr.attrib.get ('indexed', '0') == '1'
82
83 try:
84 mx = attr.attrib ['max']
85 a.range = (1, int (mx))
86 except KeyError: pass
87
88 for name in attr.findall ('name'):
89 lang = name.attrib.get ('lang', '')
90 a.names [lang] = name.text
91
92 a.xmlread(self, attr)
93 return a
94
95 for attr in tree.findall ('./attribute'):
96 a = parseattr (attr)
97
98 if self.has_key (a.id):
99 raise SchemaError ('duplicate attribute %s' % repr (a.id))
100
101 for q in attr.findall ('./qualifiers/attribute'):
102 qa = parseattr (q)
103 if a.q.has_key (qa.id):
104 raise SchemaError ('duplicate qualifier %s for attribute %s' % (
105 repr (qa.id), repr (a.id)))
106
107 a.q [qa.id] = qa
108
109 self [a.id] = a
110
111
112
113 for attr in tree.findall ('./txo-group'):
114 g = TxoGroup()
115 g.xmlread(attr)
116
117 self.txo[g.group] = g
118 return
119
120
121 - def xmlwrite (self, fd, embedded = False):
122
123 if not embedded:
124 fd.write ('<?xml version="1.0" encoding="utf-8"?>\n\n')
125
126 fd.write ('<pyblio-schema')
127 if self.id:
128 fd.write(' id="%s"' % escape(self.id))
129 fd.write('>\n')
130
131
132 keys = self.names.keys ()
133 keys.sort ()
134
135 for k in keys:
136 v = self.names [k]
137 if k:
138 lang = ' lang="%s"' % k
139 else:
140 lang = ''
141
142 fd.write (' <name%s>%s</name>\n' % (
143 lang, escape (v.encode ('utf-8'))))
144
145 if keys: fd.write('\n')
146
147 keys = self.keys ()
148 keys.sort ()
149
150 for k in keys:
151 self[k].xmlwrite (fd)
152 fd.write('\n')
153
154 ks = self.txo.keys()
155 ks.sort()
156
157 for k in ks:
158 self.txo[k].xmlwrite(fd)
159
160 fd.write ('</pyblio-schema>\n')
161 return
162
163
165
167
168 self.id = id
169
170 self.type = None
171
172 self.range = (1, None)
173
174 self.names = {}
175
176 self.q = {}
177 return
178
180
181 return 'Attribute (%s, %s, %s)' % (
182 repr (self.id), repr (self.type), repr (self.q))
183
184
188
189 name = property (_name_get)
190
191 - def _xmlopen(self, fd, offset, **extra):
192 ws = ' ' * offset
193
194 names = self.names.keys ()
195 names.sort ()
196
197 if self.indexed: idx = ' indexed="1"'
198 else: idx = ''
199
200 if self.range [1] is None: card = ""
201 else: card = ' max="%d"' % self.range [1]
202
203 if extra:
204 extra = ' ' + ' '.join(['%s="%s"' % x for x in extra.iteritems()])
205 else:
206 extra = ''
207
208 fd.write ('%s<attribute id="%s" type="%s"%s%s%s>\n' % (
209 ws, self.id, C_to_N [self.type], card, idx, extra))
210
211 for k in names:
212 v = escape (self.names [k].encode ('utf-8'))
213 if k: k = ' lang="%s"' % k
214 fd.write ('%s <name%s>%s</name>\n' % (ws, k, v))
215
216 if self.q:
217 keys = self.q.keys ()
218 keys.sort ()
219
220 fd.write ('\n')
221 fd.write ('%s <qualifiers>\n' % ws)
222 for k in keys: self.q [k].xmlwrite (fd, offset = offset + 2)
223 fd.write ('%s </qualifiers>\n' % ws)
224
225
229
231
232 ws = ' ' * offset
233
234 self._xmlopen(fd, offset)
235
236 fd.write ('%s</attribute>\n' % ws)
237 return
238
239
241
243
244 return 'TxoAttribute (%s, %s, %s, %s)' % (
245 repr (self.id), repr (self.type), repr (self.group),
246 repr (self.q))
247
249
250 self.group = attr.attrib ['group']
251
252 g = TxoGroup()
253 g.group = self.group
254
255 schema.txo.setdefault(self.group, g)
256 return
257
259
260 ws = ' ' * offset
261
262 self._xmlopen(fd, offset, group=self.group)
263
264 fd.write ('%s</attribute>\n' % ws)
265 return
266
267
269
270 """ Definition of a taxonomy item.
271
272 This item can then be reused as the argument for L{Attribute.Txo}
273 creation. A taxonomy item can be seen as a value in a enumeration
274 of possible values. Compared to a I{simple} enumeration, it has
275 the additional property of being hierachical. For instance, you
276 could define a taxonomy of document types::
277
278 - publication
279 - article
280 - peer-reviewed
281 - not peer-reviewed
282 - conference paper
283 - unpublished
284 - report
285
286 ...and use this taxonomy to fill an attribute of your records. If
287 you use L{Pyblio.Query} to search for the item I{article}, you
288 will retrieve all the records which contain one of I{article},
289 I{peer-reviewed} or I{not peer-reviewed}.
290 """
291
293
294 self.id = None
295 self.group = None
296 self.parent = None
297
298 self.names = {}
299 return
300
304
305 name = property (_name_get)
306
307
309
310 keys = self.names.keys ()
311 keys.sort ()
312
313 for k in keys:
314 v = self.names [k]
315 if k:
316 lang = ' lang="%s"' % k
317 else:
318 lang = ''
319
320 fd.write (' %s<name%s>%s</name>\n' % (
321 space, lang, escape (v.encode ('utf-8'))))
322
323 return
324
326
327 return 'TxoItem(%s, %s)' % (repr(self.group), repr(self.id))
328
329
331
333 dict.__init__(self)
334
335 self.group = None
336
337
338 self._byname = {}
339 return
340
342 return 'TxoGroup (%s)' % (
343 repr (self.group))
344
346 return self._byname[name]
347
349
350 self.group = attr.attrib['id']
351
352 def nesting(tree, parent):
353 for item in tree.findall ('./txo-item'):
354 i = TxoItem ()
355
356 i.id = int(item.attrib['id'])
357 i.parent = parent
358 i.group = self.group
359
360 for name in item.findall ('./name'):
361 lang = name.attrib.get ('lang', '')
362 i.names[lang] = name.text
363
364 if 'C' in i.names:
365 cname = i.names['C']
366 if cname in self._byname:
367 raise SchemaError('name %r appears more than once' % cname)
368
369 self._byname[cname] = i
370
371 self[i.id] = i
372
373 nesting (item, i.id)
374
375 nesting(attr, None)
376 return
377
378
380 """ Create the reversed taxonomy tree """
381
382 children = { None: [] }
383
384 for k in self.keys ():
385 children [k] = []
386
387 for v in self.values ():
388 children [v.parent].append (v.id)
389
390 return children
391
393 """ Return a txo and all its children """
394
395 children = self._reverse ()
396
397 full = []
398 for c in children [k]:
399 full = full + self.expand (c)
400
401 full.append (k)
402
403 return full
404
405
407
408 ws = ' ' * offset
409
410 if not self.keys(): return
411
412 fd.write ('%s<txo-group id="%s">\n' % (ws, self.group))
413
414 children = self._reverse()
415
416 def subwrite (node, depth = 0):
417 child = self [node]
418
419 space = ' ' * (offset + depth)
420
421 fd.write (' %s<txo-item id="%d">\n' % (
422 space, child.id))
423
424 child.xmlwrite (fd, space)
425
426 for n in children [node]:
427 subwrite (n, depth + 1)
428
429 fd.write (' %s</txo-item>\n' % space)
430 return
431
432 for n in children [None]:
433 subwrite (n)
434
435 fd.write ('%s</txo-group>\n\n' % ws)
436 return
437