- 182 名前:デフォルトの名無しさん mailto:sage [2007/04/02(月) 19:56:25 ]
- >>181
import re import urllib rexp1 = re.compile(r"[ ][-][ ][-][ ][[]|[]][ ]\"|\"[ ]\"|\"[ ]|[ ]\"|\"|[ ]") def convert(linein): record = rexp1.sub(",", linein, 9).rstrip("\"").split(",") urlpaths = record.pop(4).split("/") def decode(x): bin = urllib.unquote(x) try: return unicode(bin, "gbk") except UnicodeDecodeError: return unicode(bin, "utf-8") record += map(decode, urlpaths) out = u",".join([x.rstrip() for x in record]) + u"\n" return out.encode("utf-8") converted = (convert(x) for x in file("112115.txt", "r")) file("112115.csv", "w").writelines(converted)
|

|