# -*- coding: utf-8 -*-
import urllib
import sgmllib
class ParseOldURL(sgmllib.SGMLParser):
"A simple parser class."
def parse(self, s):
"Parse the given string 's'."
self.feed(s)
self.close()
def __init__(self, url_l, print_s=0, verbose=0):
"Initialise an object, passing 'verbose' to the superclass."
sgmllib.SGMLParser.__init__(self, verbose)
self.newURL = ''
self.redirectURL = ''
self.oldURL = 0
self.trouve = 0
opener = urllib.FancyURLopener({})
urlopener = opener.open(url_l)
s = urlopener.read()
self.redirectURL = self.redirectURL + urlopener.geturl()
if print_s == 1:
print s
self.parse(s)
def start_a(self, attributes):
if self.trouve == 0:
if self.oldURL > 0:
for name, value in attributes:
if name == "href":
self.newURL = self.newURL + value
self.trouve = 1
def start_div(self, attributes):
if self.trouve == 0:
if self.oldURL == 0:
for name, value in attributes:
if name == "id":
if value.find("oldURL") != -1:
self.oldURL = 1
else :
self.oldURL = self.oldURL + 1
def end_div(self):
if self.trouve == 0:
if self.oldURL > 0:
self.oldURL = self.oldURL - 1
def get_newURL(self):
if self.trouve == 0:
return self.redirectURL
else :
return self.newURL
def get_param(self, param):
if self.trouve == 0:
return ''
else :
_idTexte = param + '='
start_idTexte = self.newURL.find(_idTexte)
if start_idTexte > -1:
end_idTexte = self.newURL.find('&', start_idTexte+len(_idTexte))
if end_idTexte == -1:
end_idTexte = len(self.newURL)
return self.newURL[start_idTexte+len(_idTexte):end_idTexte]
def get_idTexte(self):
return self.get_param('idTexte')
def get_dateTexte(self):
return self.get_param('dateTexte')
class ModeleLegifrance:
def __init__(self, ml):
self.Code = {
u"CASS" : "CASS",
u"INCA" : "INCA",
u"JADE" : "JADE",
u"CONSTIT" : "CONSTIT",
u"LEGI" : "LEGI",
u"LEX" : "LEX",
u"LEX_SIMPLE_AV90" : "LEX_SIMPLE_AV90",
u"avant90" : "LEX_SIMPLE_AV90",
u"consolidé" : "texteconsolide",
u"texteconsolide" : "texteconsolide",
u"consolide" : "texteconsolide",
u"JORF" : "JORF",
u"CC" : "CCIVILL0",
u"CCIVILL0" : "CCIVILL0",
u"CCOM" : "CCOMMERL",
u"COM" : "CCOMMERL",
u"CCOM(R)" : "CCOMMERM",
u"COM(R)" : "CCOMMERM",
u"CGCT" : "CGCTERRL",
u"CGCT(R)" : "CGCTERRM",
u"CEDU" : "CEDUCATL",
u"CEDU(R)" : "CEDUCATM",
u"CELE" : "CELECTOL",
u"CELE(R)" : "CELECTOM",
u"CESEDA(L)" : "CENTGERL",
u"CESEDA(R)" : "CENTGERM",
u"CE" : "CENVIROL",
u"CE(R)" : "CENVIROM",
u"CJA" : "CJUSADML",
u"CJA(R)" : "CJUSADMR",
u"CJF(L)" : "CJURFINL",
u"CJF(R)" : "CJURFINR",
u"COJ(L)" : "CORGJUDL",
u"COJ(R)" : "CORGJUDR",
u"COJ" : "CORGJUNL",
u"CPAT" : "CPATRIML",
u"CP" : "CPENALLL",
u"CP(R)" : "CPENALLR",
u"CPC" : "CPROCIA0",
u"NCPC" : "CPROCIV0",
u"CPROCIV0" : "CPROCIV0",
u"CPP" : "CPROCPEL",
u"CPP(R)" : "CPROCPER",
u"CPP(D)" : "CPROCPED",
u"CPP(A)" : "CPROCPEA",
u"CGPPP" : "CGPROPPL",
u"CPI" : "CPROINTL",
u"CESEDA" : "CENTGERL",
u"CRO" : "CROUTENL",
u"CRO(R)" : "CROUTENM",
u"CR" : "CRURALNL",
u"CR(R)" : "CRURALNM",
u"CSP" : "CSANPUNL",
u"CSP(NR)" : "CSANPUNR",
u"CSP(L)" : "CSANPUBL",
u"CSP(R)" : "CSANPUBR",
u"CSS(L)" : "CSECSOCL",
u"CSS(D)" : "CSECSOCD",
u"CSS(R)" : "CSECSOCR",
u"CT(NL)" : "CTRAVANL",
u"CT" : "CTRAVAIL",
u"CT(R)" : "CTRAVAIR",
u"CT(D)" : "CTRAVAID",
u"CONSO" : "CCONSOML",
u"CONSO(R)" : "CCONSOMR",
u"CONSO(D)" : "CCONSOMD",
u"URBA(L)" : "CURBANIL",
u"URBA(R)" : "CURBANIR",
u"CGI" : "CGIMPO00",
u"CGLIVP" : "CGLIVPFL",
u"CGLIVPFM" : "CGLIVPFM",
u"CGLIVPFA" : "CGLIVPFA",
u"ASS" : "CASSURAL",
u"ASS(R)" : "CASSURAM",
u"ASS(A)" : "CASSURAA",
u"CDEF" : "CDAFENSL",
u"CDEF(R)" : "CDAFENSM"
}
self.iCode = {
u"CASS" : 0,
u"INCA" : 1,
u"JADE" : 2,
u"CONSTIT" : 3,
u"LEGI" : 4,
u"LEX" : 5,
u"LEX_SIMPLE_AV90" : 6,
u"avant90" : 7,
u"consolidé" : 8,
u"texteconsolide" : 9,
u"consolide" : 10,
u"JORF" : 11,
u"CC" : 12,
u"CCIVILL0" : 13,
u"CCOM" : 14,
u"COM" : 15,
u"CCOM(R)" : 16,
u"COM(R)" : 17,
u"CGCT" : 18,
u"CGCT(R)" : 19,
u"CEDU" : 20,
u"CEDU(R)" : 21,
u"CELE" : 22,
u"CELE(R)" : 23,
u"CESEDA(L)" : 24,
u"CESEDA(R)" : 25,
u"CE" : 26,
u"CE(R)" : 27,
u"CJA" : 28,
u"CJA(R)" : 29,
u"CJF(L)" : 30,
u"CJF(R)" : 31,
u"COJ(L)" : 32,
u"COJ(R)" : 33,
u"COJ" : 34,
u"CPAT" : 35,
u"CP" : 36,
u"CP(R)" : 37,
u"CPC" : 38,
u"NCPC" : 39,
u"CPROCIV0" : 40,
u"CPP" : 41,
u"CPP(R)" : 42,
u"CPP(D)" : 43,
u"CPP(A)" : 44,
u"CGPPP" : 45,
u"CPI" : 46,
u"CESEDA" : 47,
u"CRO" : 48,
u"CRO(R)" : 49,
u"CR" : 50,
u"CR(R)" : 51,
u"CSP" : 52,
u"CSP(NR)" : 53,
u"CSP(L)" : 54,
u"CSP(R)" : 55,
u"CSS(L)" : 56,
u"CSS(D)" : 57,
u"CSS(R)" : 58,
u"CT(NL)" : 59,
u"CT" : 60,
u"CT(R)" : 61,
u"CT(D)" : 62,
u"CONSO" : 63,
u"CONSO(R)" : 64,
u"CONSO(D)" : 65,
u"URBA(L)" : 66,
u"URBA(R)" : 67,
u"CGI" : 68,
u"CGLIVP" : 69,
u"CGLIVPFM" : 70,
u"CGLIVPFA" : 71,
u"ASS" : 72,
u"ASS(R)" : 73,
u"ASS(A)" : 74,
u"CDEF" : 75,
u"CDEF(R)" : 76
}
self.Modele = ""
self.Base = ""
self.Numero = ""
self.Texte = ""
self.oldURL = ""
p = ml.find("{{")
if p != -1:
ml = ml[p:]
p = ml.find("}}")
if p != -1:
ml = ml[:p]
self.listParametres = ml.split("|", 3)
l = len(self.listParametres)
if l > 0:
p = self.listParametres[0].find(u"Légifrance")
if p == -1:
p = self.listParametres[0].find(u"légifrance")
if p != -1:
self.Modele = u"Légifrance"
if l > 1:
p = self.listParametres[1].find("=")
if p == -1 :
self.Base = self.listParametres[1]
else :
self.Base = self.listParametres[1][p+1:]
if l > 2:
p = self.listParametres[2].find("=")
if p == -1 :
self.Numero = self.listParametres[2]
else :
self.Numero = self.listParametres[2][p+1:]
if l > 3:
p = self.listParametres[3].find("=")
if p == -1 :
self.Texte = self.listParametres[3]
else :
self.Texte = self.listParametres[3][p+1:]
def Debug(self):
print "Modele = " + self.Modele
print "Base = " + self.get_newBase()
print "Numero = " + self.Numero
print "Texte = " + self.Texte
def get_newBase(self):
if self.Base in self.Code:
return self.Code[self.Base]
else:
return self.Base
def get_texte(self):
if self.Texte == "":
if self.get_iCode() < 12:
return ""
else:
return self.Numero
else:
return self.Texte
def get_iCode(self):
if self.Base in self.iCode:
return self.iCode[self.Base]
else:
return -1
def doOldURL(self):
self.oldURL = "http://www.legifrance.gouv.fr/"
i = self.get_iCode()
if i >= 0:
if i < 8:
self.oldURL = self.oldURL + "WAspad/UnDocument?base=" + self.get_newBase() + "&nod="
elif i < 11:
self.oldURL = self.oldURL + "texteconsolide/"
elif i == 11:
self.oldURL = self.oldURL + "WAspad/UnTexteDeJorf?numjo="
else:
self.oldURL = self.oldURL + "WAspad/UnArticleDeCode?code=" + self.get_newBase() + ".rcv&art="
self.oldURL = self.oldURL + self.Numero
return self.oldURL
# -*- coding: utf-8 -*-
import wikipedia
import ModeleLegifrance
def save_texte(nom_fichier, texte):
f = open(nom_fichier, "wt")
f.write(texte.encode("utf-8"))
f.close();
def traite_modele(modele, old, test=0):
new = ""
l = len(old)
p = 0
while p > -1:
p = old.find("{{" + modele)
if p == -1:
new = new + old
else:
new = new + old[:p]
old = old[p:]
p = old.find("}}")
if p == -1:
new = new + old
else:
a = ModeleLegifrance.ModeleLegifrance(old)
new = new + "{{" + a.Modele + "|base=" + a.Base + u"|numéro=" + a.Numero + "|texte=" + a.get_texte() + "}}"
old = old[p+2:]
if test == 1:
print a.get_iCode()
return new
def main(test=0):
listeArticles = []
f = open("test_Jbot_ML.lst", "rt")
listeArticles = f.readlines()
f.close();
site = wikipedia.getSite()
for nomDePageURL in listeArticles:
nomDePageURL = nomDePageURL[:len(nomDePageURL)-1]
pageL = wikipedia.Page(site, nomDePageURL)
if pageL.exists():
if not pageL.isRedirectPage():
if pageL.botMayEdit():
print nomDePageURL
old = pageL.get()
if test == 1:
save_texte(nomDePageURL + ".old", old)
new = traite_modele(u"légifrance", old)
if test == 1:
save_texte(nomDePageURL + ".new1", new)
if len(new) == 0:
new = traite_modele(u"Légifrance", old)
else:
new = traite_modele(u"Légifrance", new)
if new != old:
if test == 1:
save_texte(nomDePageURL + ".new", new)
else:
pageL.put(new, u"Ajout des noms des paramètres du [[Modèle:Légifrance]]")
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()