from pyquery import PyQuery as pq import os from queue import Queue from threading < span style="color: #0000ff;">import Thread class txtparser(Thread): def __init__(self,queue): Thread.__init__(self) self.queue = queue #Folder directory
def run(self): #path = "E:\辰东\ZheTian\\395020.html"
while True: content = self.queue.get() html=""
try: with open (content,"r",encoding='utf-8' ) as reader: html = reader.read() except Exception: with open (content," r") as reader: html = reader.read() #print(html)
try: doc = pq(html) title = doc("#main .content_read .box_con. bookname h1") print("title=====",title.text()) clipname = content.split("\\")[-2< span style="color: #000000;">] #junkp = doc(" .content").find('p').remove()
passage = doc("#content").text() except Exception: continue
print("Body======",str.replace(passage,"< /span>
","")) try: clipname = str.replace(clipname,"《< /span>","") clipname = str.replace(clipname,"》","") except Exception: clipname = clipname if os.path.exists(clipname): pass
else: os.mkdir(clipname) try: with open(clipname+" \\"+title.text()+".txt","w",encoding=< span style="color: #800000;">"gbk" ) as writer: writer.write(passage) print("Complete the writing of {}" .format(clipname+"\\"+title.text()+".txt")) except Exception: with open("errorecorder.log","a"< /span>) as writer: writer.write(clipname+"\\"+title.text()+ ".txt"+"\r") print("Folder name====== ",clipname) def launchtxtparser(parentdir): rootdir = parentdir queue = Queue() span>print(rootdir) for i in os.listdir(rootdir): print(i) if os .path.isdir(rootdir+"\\"+i): print(rootdir+"\\"+i) g < /span>= (k for k in os.listdir(rootdir+"\\"+< span style="color: #000000;">i)) print(next( g)) while True: try: filename = next(g) fullfilename = rootdir+"\\"< /span>+i+"\\"+filename queue.put(fullfilename) print(fullfilename) except StopIteration: print("ooooophs~processed") break
for i in range(10): cpc = txtparser(queue) cpc.daemon=True cpc .start() queue.join() #print(os.listdir(rootdir) )
launchtxtparser("E:\月关")
from pyquery import PyQuery as pq import os < span style="color: #0000ff;">from queue import Queue from threading import Thread < /span>class txtparser(Thread): def __init__(self,queue): Thread.< span s tyle="color: #800080;">__init__(self) self.queue = queue #folder directory
def run(self): #path = "E:\辰东\ZheTian\\395020.html"
while True: content = self.queue.get() html=""
try: with open (content,"r",encoding='utf-8' ) as reader: html = reader.read() except Exception: with open (content," r") as reader: html = reader.read() #print(html)
try: doc = pq(html) title = doc("#main .content_read .box_con. bookname h1") print("title=====",title.text()) clipname = content.split("\\")[-2< span style="color: #000000;">] #junkp = doc(" .content").find('p').remove()
passage = doc("#content").text() except Exception: continue
print("Body======",str.replace(passage,"< /span>
","")) try: clipname = str.replace(clipname,"《< /span>","") clipname = str.replace(clipname,"》","") except Exception: clipname = clipname if os.path.exists(clipname): pass
else: os.mkdir(clipname) try: with open(clipname+" \\"+title.text()+".txt","w",encoding=< span style="color: #800000;">"gbk" ) as writer: writer.write(passage) print("Complete the writing of {}" .format(clipname+"\\"+title.text()+".txt")) except Exception: with open("errorecorder.log","a"< /span>) as writer: writer.write(clipname+"\\"+title.text()+ ".txt"+"\r") print("Folder name====== ",clipname) def launchtxtparser(parentdir): rootdir = parentdir queue = Queue() span>print(rootdir) for i in os.listdir(rootdir): print(i) if os .path.isdir(rootdir+"\\"+i): print(rootdir+"\\"+i) g < /span>= (k for k in os.listdir(rootdir+"\\"+< span style="color: #000000;">i)) print(next( g)) while True: try: filename = next(g) fullfilename = rootdir+"\\"< /span>+i+"\\"+filename queue.put(fullfilename) print(fullfilename) except StopIteration: print("ooooophs~processed") break
for i in range(10): cpc = txtparser(queue) cpc.daemon=True cpc .start() queue.join() #print(os.listdir(rootdir) )
launchtxtparser("E:\月关")