python_download_offline_html

先上代码 本代码有参考

https://blog.csdn.net/gorquanwu/article/details/81739589 这篇文章去实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# author: leek
# date:2021-6-23
from urllib import request
from bs4 import BeautifulSoup as bs
import time
import os
import re

'''
用来爬取网站网页 gitbook 页面 离线下载到本地
实现功能:url深度抓取,保存每个页面的css、html、js等文件
'''


# 深度爬取当前页面子网站子网站
def get_urls(url, baseurl, urls):
with request.urlopen(url) as f:
data = f.read().decode('utf-8')
# link = bs(data).find_all('a')
link = bs(data).find("nav").find_all('a')
for i in link:
suffix = i.get('href')
# 设置排除写入的子连接
if suffix == '#' or suffix == '#carousel-example-generic' or 'javascript:void(0)' in suffix:
continue
else:
# 构建urls
childurl = baseurl +"/"+ suffix
if childurl not in urls:
urls.append(childurl)

# 获取每个页面代码以及获取页面上的css,js,img路径
def get_source(url, path):
try:
with request.urlopen(url) as f:
html_source = f.read().decode()
# 添加时间截以区分文件夹名字
timeStr = str(int(time.time()))
pattertitile = '<title>(.*?)</title>'
patternimg = '<img src="(.*?)"'
titleStr = re.compile(pattertitile, re.S).findall(html_source)[0]
if '|' in titleStr:
title = (titleStr.split("|")[1]).split(' ')[1] + timeStr
else:
title = titleStr + timeStr

path11 = path + '/' + title
arrayurl= url.split('/')
htmlFile = path
for x in range(3, len(arrayurl)):
htmlFile +=("/"+arrayurl[x])

# 获取css,js,img地址
imgHref = re.compile(patternimg, re.S).findall(html_source)
# 创建文件路径下的父节点
os.makedirs(os.path.abspath(htmlFile + os.path.sep + ".."), exist_ok=True)
# 读取并保存html
with open(htmlFile, 'w', encoding='UTF-8') as f:
f.write(html_source)
print(htmlFile+ "文件保存成功")
time.sleep(1)
except:
print(url + "保存html文件时报错")

# 保存js文件
def save_css_js(path):
url = "http://sdk.g-bim.cn"
filename = path
with request.urlopen(url) as total_html:
html_source = total_html.read().decode()
jsHref = re.compile('<script src="(.*?)"', re.S).findall(html_source)
cssHref = re.compile( '<link rel="stylesheet" href="(.*?)"', re.S).findall(html_source)

for j in jsHref :
try:
with request.urlopen(url+"/"+j) as ww:
js_source = ww.read().decode()
# filename =(path+'\\'+ j).replace('/','\\') os.getcwd()
filename =path+j
os.makedirs(os.path.abspath(filename+os.path.sep+".."), exist_ok=True)
with open(filename, 'w', encoding='UTF-8') as f:
f.write(js_source)
print(j.split('/')[-1] + " js文件保存成功")
time.sleep(1)
except:
print("该" + j.split('/')[-1] + " js文件无法下载")


for k in cssHref:
try:
with request.urlopen(url+"/"+k) as vv:
filename = path+ k
js_source = vv.read().decode()
os.makedirs(os.path.abspath(filename + os.path.sep + ".."), exist_ok=True)
with open(filename, 'w', encoding='UTF-8') as f:
f.write(js_source)
print(k.split('/')[-1] + " js文件保存成功")
time.sleep(1)
except:
print("该" + k.split('/')[-1] + " js文件无法下载")


# 保存img文件
def save_img(href, path):
for i in range(0, len(href)):
url = "http://sdk.g-bim.cn" + href[i]
filename = path + '\\' + href[i].split('/')[-1]
try:
with request.urlopen(url) as w:
img_source = w.read()
with open(filename, 'wb') as f:
f.write(img_source)
print(href[i].split('/')[-1] + " 图像文件保存成功")
time.sleep(1)
except:
print("该" + href[i].split('/')[-1] + " 图像无法下载")
continue


if __name__ == '__main__':
# 抓取网址
url = 'http://sdk.g-bim.cn'
# 相对路径地址
baseurl = 'http://sdk.g-bim.cn'
# 文件保存位置
basedir = r'C:\Users\Administrator\Desktop\HTML_bak'
urls = []
# 获取所有地址
get_urls(url, baseurl, urls)
# save_css_js(r'../html_bak/')
# 获取代码
for u in urls:
get_source(u,r'../html_bak')

geomesa-input-error

导入失败

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
ERROR java.lang.NoSuchMethodError: org.locationtech.jts.geom.Polygon.getExteriorRing()Lorg/locationtech/jts/geom/LineString;
java.util.concurrent.ExecutionException: java.lang.NoSuchMethodError: org.locationtech.jts.geom.Polygon.getExteriorRing()Lorg/locationtech/jts/geom/LineString;
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$$anonfun$runIngest$1.apply(LocalConverterIngest.scala:183)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$$anonfun$runIngest$1.apply(LocalConverterIngest.scala:183)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest.runIngest(LocalConverterIngest.scala:183)
at org.locationtech.geomesa.tools.ingest.AbstractConverterIngest.run(AbstractConverterIngest.scala:41)
at org.locationtech.geomesa.tools.ingest.IngestCommand$$anonfun$execute$2.apply(IngestCommand.scala:106)
at org.locationtech.geomesa.tools.ingest.IngestCommand$$anonfun$execute$2.apply(IngestCommand.scala:105)
at scala.Option.foreach(Option.scala:257)
at org.locationtech.geomesa.tools.ingest.IngestCommand$class.execute(IngestCommand.scala:105)
at org.locationtech.geomesa.hbase.tools.HBaseRunner$$anon$2.execute(HBaseRunner.scala:32)
at org.locationtech.geomesa.tools.Runner$class.main(Runner.scala:30)
at org.locationtech.geomesa.hbase.tools.HBaseRunner$.main(HBaseRunner.scala:17)
at org.locationtech.geomesa.hbase.tools.HBaseRunner.main(HBaseRunner.scala)
Caused by: java.lang.NoSuchMethodError: org.locationtech.jts.geom.Polygon.getExteriorRing()Lorg/locationtech/jts/geom/LineString;
at org.locationtech.geomesa.features.serialization.WkbSerialization$class.writePolygon(WkbSerialization.scala:121)
at org.locationtech.geomesa.features.serialization.WkbSerialization$class.serializeWkb(WkbSerialization.scala:45)
at org.locationtech.geomesa.features.kryo.serialization.KryoGeometrySerialization$.serializeWkb(KryoGeometrySerialization.scala:14)
at org.locationtech.geomesa.features.serialization.WkbSerialization$class.writeGeometryCollection(WkbSerialization.scala:176)
at org.locationtech.geomesa.features.serialization.WkbSerialization$class.serializeWkb(WkbSerialization.scala:48)
at org.locationtech.geomesa.features.kryo.serialization.KryoGeometrySerialization$.serializeWkb(KryoGeometrySerialization.scala:14)
at org.locationtech.geomesa.features.kryo.impl.KryoFeatureSerialization$KryoGeometryWkbWriter$.apply(KryoFeatureSerialization.scala:228)
at org.locationtech.geomesa.features.kryo.impl.KryoFeatureSerialization$class.writeFeature(KryoFeatureSerialization.scala:70)
at org.locationtech.geomesa.features.kryo.impl.KryoFeatureSerialization$class.serialize(KryoFeatureSerialization.scala:42)
at org.locationtech.geomesa.features.kryo.KryoFeatureSerializer$MutableActiveSerializer.serialize(KryoFeatureSerializer.scala:75)
at org.locationtech.geomesa.index.api.WritableFeature$FeatureLevelWritableFeature$$anonfun$values$1$$anonfun$apply$1.apply(WritableFeature.scala:154)
at org.locationtech.geomesa.index.api.WritableFeature$FeatureLevelWritableFeature$$anonfun$values$1$$anonfun$apply$1.apply(WritableFeature.scala:154)
at org.locationtech.geomesa.index.api.package$KeyValue.value$lzycompute(package.scala:184)
at org.locationtech.geomesa.index.api.package$KeyValue.value(package.scala:184)
at org.locationtech.geomesa.hbase.data.HBaseIndexAdapter$HBaseIndexWriter$$anonfun$write$1.apply(HBaseIndexAdapter.scala:614)
at org.locationtech.geomesa.hbase.data.HBaseIndexAdapter$HBaseIndexWriter$$anonfun$write$1.apply(HBaseIndexAdapter.scala:612)
at scala.collection.Iterator$class.foreach(Iterator.scala:742)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at org.locationtech.geomesa.hbase.data.HBaseIndexAdapter$HBaseIndexWriter.write(HBaseIndexAdapter.scala:612)
at org.locationtech.geomesa.index.api.IndexAdapter$BaseIndexWriter.write(IndexAdapter.scala:149)
at org.locationtech.geomesa.index.geotools.GeoMesaFeatureWriter$class.writeFeature(GeoMesaFeatureWriter.scala:52)
at org.locationtech.geomesa.index.geotools.GeoMesaFeatureWriter$TableFeatureWriter.writeFeature(GeoMesaFeatureWriter.scala:141)
at org.locationtech.geomesa.index.geotools.GeoMesaFeatureWriter$GeoMesaAppendFeatureWriter$class.write(GeoMesaFeatureWriter.scala:227)
at org.locationtech.geomesa.index.geotools.GeoMesaFeatureWriter$$anon$3.write(GeoMesaFeatureWriter.scala:108)
at org.locationtech.geomesa.utils.geotools.FeatureUtils$.write(FeatureUtils.scala:141)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4$$anonfun$apply$5$$anonfun$apply$6.apply(LocalConverterIngest.scala:117)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4$$anonfun$apply$5$$anonfun$apply$6.apply(LocalConverterIngest.scala:115)
at scala.collection.Iterator$class.foreach(Iterator.scala:742)
at org.locationtech.geomesa.utils.collection.CloseableIterator$FlatMapCloseableIterator.foreach(CloseableIterator.scala:133)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4$$anonfun$apply$5.apply(LocalConverterIngest.scala:115)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4$$anonfun$apply$5.apply(LocalConverterIngest.scala:109)
at org.locationtech.geomesa.utils.io.CloseablePool$CommonsPoolPool.borrow(CloseablePool.scala:68)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4.apply(LocalConverterIngest.scala:109)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$4.apply(LocalConverterIngest.scala:108)
at org.locationtech.geomesa.utils.io.package$WithClose$.apply(package.scala:64)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2.apply(LocalConverterIngest.scala:108)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1$$anonfun$apply$2.apply(LocalConverterIngest.scala:106)
at scala.collection.Iterator$class.foreach(Iterator.scala:742)
at org.locationtech.geomesa.utils.collection.CloseableIterator$CloseableSingleIterator.foreach(CloseableIterator.scala:86)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1.apply(LocalConverterIngest.scala:106)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1$$anonfun$apply$1.apply(LocalConverterIngest.scala:105)
at org.locationtech.geomesa.utils.io.package$WithClose$.apply(package.scala:64)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1.apply(LocalConverterIngest.scala:105)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1$$anonfun$run$1.apply(LocalConverterIngest.scala:102)
at org.locationtech.geomesa.utils.io.CloseablePool$CommonsPoolPool.borrow(CloseablePool.scala:68)
at org.locationtech.geomesa.tools.ingest.LocalConverterIngest$LocalIngestWorker$1.run(LocalConverterIngest.scala:102)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

netty_use

使用netty来简单写个demo,熟悉语法和常见的命令

1
2
3
4
5
6
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.49.Final</version>
</dependency>

hadoop_windows

Unable to find native drivers in HADOOP_HOME. Please, refer to Hadoop Wiki for more details.

link

配置环境变量 包括java和hadoop的注意一点尽量不要有空格否则在
java -version
hadoop version 的时候回报错 java_home incorrect

log-drive一次 java.io.IOException Connection reset by peer 故障排查

转载地址: https://mengkang.net/1118.html

我的弹幕服务,代码换了个环境运行,虽然服务可用,但是发现出现了如下情况,基本1秒输出来一次:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcherImpl.read0(Native Method)
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223)
at sun.nio.ch.IOUtil.read(IOUtil.java:192)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380)
at io.netty.buffer.PooledUnsafeDirectByteBuf.setBytes(PooledUnsafeDirectByteBuf.java:311)
at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:853)
at io.netty.buffer.WrappedByteBuf.writeBytes(WrappedByteBuf.java:641)
at io.netty.channel.socket.nio.NioSocketChannel.doReadBytes(NioSocketChannel.java:240)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:115)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:514)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:471)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:385)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:351)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116)
at io.netty.util.internal.chmv8.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1412)
at io.netty.util.internal.chmv8.ForkJoinTask.doExec(ForkJoinTask.java:280)
at io.netty.util.internal.chmv8.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:877)
at io.netty.util.internal.chmv8.ForkJoinPool.scan(ForkJoinPool.java:1706)
at io.netty.util.internal.chmv8.ForkJoinPool.runWorker(ForkJoinPool.java:1661)
at io.netty.util.internal.chmv8.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:126)