summaryrefslogtreecommitdiff
path: root/scripts/iso639tab.py
blob: 23f095e13bab60ae3d003343d2e1394215f0a096 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
#
# Read iso-codes iso_639.xml data file and output a .tab file
# 
# Copyright (C) 2005 Alastair McKinstry <mckinstry@debian.org>
# Released under the GPL.
# $Id: iso639tab.py 929 2007-07-23 10:28:20Z toddy-guest $

from xml.sax import make_parser, SAXException, SAXParseException
from xml.sax.handler import feature_namespaces, ContentHandler
import sys, os, getopt, urllib2

lines = []
class printLines(ContentHandler):
	def __init__(self):
		pass

	def startElement(self, name, attrs):
		if name != 'iso_639_entry':
			return
		t_code = attrs.get('iso_639_2T_code', None)
		if t_code == None:
			raise RunTimeError, "Bad file"	
		if type(t_code) == unicode:
			t_code = t_code.encode('UTF-8')
		b_code = attrs.get('iso_639_2B_code', None)
		if b_code == None:
			raise RunTimeError, "Bad file"	
		if type(b_code) == unicode:
			b_code = b_code.encode('UTF-8')
		name = attrs.get('name', None)
		if name == None:
			raise RunTimeError, " BadFile"
		short_code=attrs.get('iso_639_1_code','XX')
		short_code=short_code.encode('UTF-8')
		if type(name) == unicode:
			name = name.encode('UTF-8')
		lines.append(t_code + '\t' + b_code + '\t' + short_code + '\t' + name + '\n')

## 
## MAIN
##


ofile = sys.stdout
ofile.write("""
## iso-639.tab
##
## Copyright (C) 2005 Alastair McKinstry   <mckinstry@computer.org> 
## Released under the GNU License; see file COPYING for details
##
## PLEASE NOTE: THIS FILE IS DEPRECATED AND SCHEDULED TO BE REMOVED.
## IT IS FOR BACKWARD-COMPATIBILITY ONLY: PLEASE USE THE ISO-639.XML
## FILE INSTEAD.
##
## This file gives a list of all languages in the ISO-639
## standard, and is used to provide translations (via gettext)
##
## Status: ISO 639-2:1998 + additions and changes until 2003-03-05
## Source: http://lcweb.loc.gov/standards/iso639-2/englangn.html
##
## Columns:
##   iso-639-2 terminology code
##   iso-639-2 bibliography code
##   iso-639-1 code (XX if none exists)
##   Name (English)
##
##
""")
p = make_parser()
try:
	dh = printLines()
	p.setContentHandler(dh)
	p.parse(sys.argv[1])
except SAXParseException, e:
	sys.stderr.write('%s:%s:%s: %s\n' % (e.getSystemId(),
					     e.getLineNumber(),
					     e.getColumnNumber(),
					     e.getMessage()))
	sys.exit(1)
except Exception, e:
	sys.stderr.write('<unknown>: %s\n' % str(e))
	sys.exit(1)
lines.sort()
for l in lines:
	ofile.write(l)
ofile.close()