-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
155 lines (142 loc) · 21.1 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
<!DOCTYPE html><html lang="zh-CN"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1"><meta name="format-detection" content="telephone=no"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="black"><link rel="icon" href="/images/icons/favicon-16x16.png?v=2.1.1" type="image/png" sizes="16x16"><link rel="icon" href="/images/icons/favicon-32x32.png?v=2.1.1" type="image/png" sizes="32x32"><meta property="og:type" content="website">
<meta property="og:title" content="GXH HOME">
<meta property="og:url" content="https://gxxxh.github.io/index.html">
<meta property="og:site_name" content="GXH HOME">
<meta property="og:locale" content="zh_CN">
<meta property="article:author" content="GXH">
<meta name="twitter:card" content="summary"><title>GXH HOME</title><link ref="canonical" href="https://gxxxh.github.io/index.html"><link rel="dns-prefetch" href="https://cdn.jsdelivr.net"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/[email protected]/css/all.min.css" type="text/css"><link rel="stylesheet" href="/css/index.css?v=2.1.1"><script>var Stun = window.Stun || {};
var CONFIG = {
root: '/',
algolia: undefined,
fontIcon: {"prompt":{"success":"fas fa-check-circle","info":"fas fa-arrow-circle-right","warning":"fas fa-exclamation-circle","error":"fas fa-times-circle"},"copyBtn":"fas fa-copy"},
sidebar: {"offsetTop":"20px","tocMaxDepth":6},
header: {"enable":true,"showOnPost":false,"scrollDownIcon":false},
postWidget: {"endText":true},
nightMode: {"enable":true},
back2top: {"enable":true},
codeblock: {"style":"carbon","highlight":"dark","wordWrap":false},
reward: false,
fancybox: false,
zoomImage: {"gapAside":"20px"},
galleryWaterfall: undefined,
lazyload: false,
pjax: undefined,
externalLink: {"icon":{"enable":true,"name":"fas fa-external-link-alt"}},
shortcuts: undefined,
prompt: {"copyButton":"复制","copySuccess":"复制成功","copyError":"复制失败"},
sourcePath: {"js":"js","css":"css","images":"images"},
};
window.CONFIG = CONFIG;</script><meta name="generator" content="Hexo 5.2.0"></head><body><div class="container" id="container"><header class="header" id="header"><div class="header-inner"><nav class="header-nav header-nav--fixed"><div class="header-nav-inner"><div class="header-nav-menubtn"><i class="fas fa-bars"></i></div><div class="header-nav-menu"><div class="header-nav-menu-item"><a class="header-nav-menu-item__link" href="/"><span class="header-nav-menu-item__icon"><i class="fas fa-home"></i></span><span class="header-nav-menu-item__text">首页</span></a></div><div class="header-nav-menu-item"><a class="header-nav-menu-item__link" href="/archives/"><span class="header-nav-menu-item__icon"><i class="fas fa-folder-open"></i></span><span class="header-nav-menu-item__text">归档</span></a></div><div class="header-nav-menu-item"><a class="header-nav-menu-item__link" href="/categories/"><span class="header-nav-menu-item__icon"><i class="fas fa-layer-group"></i></span><span class="header-nav-menu-item__text">分类</span></a></div><div class="header-nav-menu-item"><a class="header-nav-menu-item__link" href="/tags/"><span class="header-nav-menu-item__icon"><i class="fas fa-tags"></i></span><span class="header-nav-menu-item__text">标签</span></a></div></div><div class="header-nav-mode"><div class="mode"><div class="mode-track"><span class="mode-track-moon"></span><span class="mode-track-sun"></span></div><div class="mode-thumb"></div></div></div></div></nav><div class="header-banner"><div class="header-banner-info"><div class="header-banner-info__title">GXH HOME</div><div class="header-banner-info__subtitle"></div></div></div></div></header><main class="main" id="main"><div class="main-inner"><div class="content-wrap" id="content-wrap"><div class="content content-home" id="content"><section class="postlist"><article class="postlist-item post"><header class="post-header"><h1 class="post-title"><a class="post-title__link" href="/%E6%9D%82%E4%B8%83%E6%9D%82%E5%85%AB%E7%9A%84%E4%B8%9C%E8%A5%BF/Google-Earth-Engine-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89/">Google Earth Engine 调研(1)</a></h1><div class="post-meta"><span class="post-meta-item post-meta-item--createtime"><span class="post-meta-item__icon"><i class="far fa-calendar-plus"></i></span><span class="post-meta-item__info">发表于</span><span class="post-meta-item__value">2020-09-28</span></span><span class="post-meta-item post-meta-item--updatetime"><span class="post-meta-item__icon"><i class="far fa-calendar-check"></i></span><span class="post-meta-item__info">更新于</span><span class="post-meta-item__value">2020-09-28</span></span></div></header><div class="post-body"><div class="post-excerpt"></div></div></article><article class="postlist-item post"><header class="post-header"><h1 class="post-title"><a class="post-title__link" href="/%E5%88%86%E5%B8%83%E5%BC%8F%E8%AE%A1%E7%AE%97%E6%A1%86%E6%9E%B6/Apache-Ray-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89/">Apache Ray 调研(1)</a></h1><div class="post-meta"><span class="post-meta-item post-meta-item--createtime"><span class="post-meta-item__icon"><i class="far fa-calendar-plus"></i></span><span class="post-meta-item__info">发表于</span><span class="post-meta-item__value">2020-09-28</span></span><span class="post-meta-item post-meta-item--updatetime"><span class="post-meta-item__icon"><i class="far fa-calendar-check"></i></span><span class="post-meta-item__info">更新于</span><span class="post-meta-item__value">2020-09-28</span></span></div></header><div class="post-body"><div class="post-excerpt">
<h2 id="提出背景" >
<a href="#提出背景" class="heading-link"><i class="fas fa-link"></i></a>提出背景</h2>
<ol>
<li>摩尔定律终结</li>
<li>专用领域的硬件不能满足需求</li>
<li>深度学习增加了对硬件和算力的需求。</li>
</ol>
<blockquote>
<p><span class="exturl"><a class="exturl__link" target="_blank" rel="noopener" href="https://anyscale.com/blog/the-future-of-computing-is-distributed/" >The Future of Computing is Distributed</a><span class="exturl__icon"><i class="fas fa-external-link-alt"></i></span></span></p>
</blockquote>
<h2 id="特点:" >
<a href="#特点:" class="heading-link"><i class="fas fa-link"></i></a>特点:</h2>
<ol>
<li>轻量级的API接口</li>
<li>高效的数据存储和传输:每个节点通过共享内存维护了一块局部的对象存储,然后利用专门优化过的Apache Arrow格式来进行不同结点的数据交换。</li>
<li>全局状态维护:将全局的控制状态(而非数据)利用Redis分片来为胡,使得其他组件可以方便的进行平滑扩展和错误恢复。</li>
<li>去中心化的调度:调度分散在各个节点上;根据GCS拉取全局负载状态信息,然后随机选择一个合乎资源约束的可用结点。</li>
</ol>
<blockquote>
<p><span class="exturl"><a class="exturl__link" target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/64051646" >继Spark之后,UC Berkeley 推出新一代AI计算引擎——Ray</a><span class="exturl__icon"><i class="fas fa-external-link-alt"></i></span></span></p>
</blockquote>
<h2 id="Ray的软件架构" >
<a href="#Ray的软件架构" class="heading-link"><i class="fas fa-link"></i></a>Ray的软件架构</h2>
<p><img src="Apache-Ray-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89_files/1.png" alt="Ray架构"><br>Ray的结构组成包括两部分:</p>
<ol>
<li>系统层:实现任务调度和数据管理,以满足性能和容错需求</li>
<li>应用层:实现了API和计算模型</li>
</ol>
<h3 id="应用层" >
<a href="#应用层" class="heading-link"><i class="fas fa-link"></i></a>应用层</h3>
<p>应用层包含三种类型的进程:</p>
<ol>
<li>Driver: 用于执行用户程序的进程</li>
<li>Worker: 工作进程,在结点启动时被自动启动,用于执行Driver或其他Worker指派的任务的无状态进程。</li>
<li>Actor: 有状态的进程,用于顺序执行被启动时指派的任务。</li>
</ol>
<h3 id="系统层" >
<a href="#系统层" class="heading-link"><i class="fas fa-link"></i></a>系统层</h3>
<p>系统层包含三个主要组件:</p>
<h4 id="GCS-global-control-store" >
<a href="#GCS-global-control-store" class="heading-link"><i class="fas fa-link"></i></a>GCS(global control store)</h4>
<p>关键词:高容错,低延迟<br> GCS维护着系统全局的控制状态信。GCS极大简化了Ray的整体设计,不仅使得对<strong>容错</strong>支持简化了很多,也使得可以分布式的对象存储和调度器进行扩展。</p>
<h4 id="Bottom-up-distributed-scheduler" >
<a href="#Bottom-up-distributed-scheduler" class="heading-link"><i class="fas fa-link"></i></a>Bottom-up distributed scheduler</h4>
<p>为了支持每秒数百万次的调度,已有调度器,基于中心的调度器(具有很好的局部性,但是会有数十秒的延迟),分布式调度器(高并发,但是不考虑数据的局部性)。</p>
<p><img src="Apache-Ray-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89_files/2.png"></p>
<p>Ray设计了两层的调度架构,包括全局调度器(global scheduler) 和每个节点上的本地调度器(local scheduler)。本地调度器只根据本节点的局部负载信息进行调度,而全局调度器会根据全局负载来分配任务。对一个结点上创建的任务,总是尝试现在本地执行,除非本地机器过载或不能满足资源需求时,才会将其转发给全局调度器。<br>全局调度:1. 找出满足资源的结点,2选取最小排队时间的结点</p>
<h4 id="In-Memory-Distributed-Object-Store" >
<a href="#In-Memory-Distributed-Object-Store" class="heading-link"><i class="fas fa-link"></i></a>In-Memory Distributed Object Store</h4>
<p> 基于内存的分布式对象存储,为了降低任务的延迟,实现了一个基于内存的分布式存储系统以存储每个任务的输入和输出。在每个节点上,通过共享内存的方式对对象进行存储,使得同意节点上不同任务以零拷贝对的代价进行数据共享。</p>
<ol>
<li>需要将数据都放在内存中,只有内存不够时才用LRU算挤出内存。对象存储只存储不可变数据,遮掩能避免复杂的一致性协议需求,并且能简化数据的容错支持。对象存储不支持分布式对象,每个对象必须能够在单节点内存下,并且只存在单节点中。对于大矩阵和树状结构等大对象,可以通过应用层来进行拆分。</li>
</ol>
<h3 id="Ray的远程调用流程" >
<a href="#Ray的远程调用流程" class="heading-link"><i class="fas fa-link"></i></a>Ray的远程调用流程</h3>
<p><img src="Apache-Ray-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89_files/3.png" alt="Executing a task remotely"></p>
<ol start="0">
<li>远程函数add()在初始化(ray.init)时,会被自动注册到GCS中,进而分发到每个工作进程中。</li>
<li>当一个Driver进程调用add.remote(a,b),并且在a,b分别存在结点N1和N2上时。Driver首先将任务交给本地调度器。</li>
<li>本地调度器将任务请求转到全局调度器。</li>
<li>全局调度器在GCS中查找add(a,b)请求中参数a,b的位置</li>
<li>全局调度器决定将任务调度到N2上执行。</li>
<li>N2的本地调度器收到请求后,会检查本地对象存储中是否存在所有输入参数,</li>
<li>由于本地存储对象中没有对象a, 工作进程会在GCS中查找a的位置。</li>
<li>对象a存在N1中,因此将其同步到N2的本地存储中</li>
<li>N2的本地调度器执行add()操作</li>
</ol>
<p><img src="Apache-Ray-%E8%B0%83%E7%A0%94%EF%BC%881%EF%BC%89_files/4.png" alt="Returning the result of a remote task"><br>0. 该图展示了N1上执行ray.get()和N2上执行add()触发的操作。</p>
<ol>
<li>N1的Driver进程在本地对象存储中查看该id对应的对象c是否存在。</li>
<li>由于此时本地存储中没有c,Driver会向GCS中查找c的位置,此时c未被创建,N1 的对象存储向 GCS 中的对象表(Object Table)注册了一个回调函数,以监听 c 对象被创建事件</li>
<li>N2执行完add()后,将结果c存到其本地对象存储中</li>
<li>将c的信息添加到GCS的对象存储表中</li>
<li>GCS检测到c的创建,会出发之前N1注册的回调函数</li>
<li>N1的对象存储将c从N2同步过去,从而让结束该任务。</li>
</ol>
<blockquote>
<p><span class="exturl"><a class="exturl__link" target="_blank" rel="noopener" href="https://arxiv.org/abs/1712.05889" ></a><span class="exturl__icon"><i class="fas fa-external-link-alt"></i></span></span></p>
</blockquote>
<h2 id="未整理" >
<a href="#未整理" class="heading-link"><i class="fas fa-link"></i></a>未整理</h2>
<p>Ray在系统层面,是一个通用的以task为调度级别的,同时可以针对每个task控制资源粒度的一个通用的分布式task执行系统。记住,在Ray里,你需要明确定义Task以及Task的依赖,并且为每个task指定合适(数量,资源类型)的资源。</p>
<p>应用层面,你可以基于Ray的系统进行编程,因为Ray默认提供了Python的编程接口,所以你可以自己实现增强学习库(RLLib),也可以整合已有的算法框架,比如tensorflow,让tensorflow成为Ray上的一个应用,并且轻松实现分布式。</p>
</div></div></article><article class="postlist-item post"><header class="post-header"><h1 class="post-title"><a class="post-title__link" href="/%E5%88%86%E5%B8%83%E5%BC%8F%E8%AE%A1%E7%AE%97%E6%A1%86%E6%9E%B6/Spark%E5%AD%A6%E4%B9%A0%EF%BC%88%E4%B8%80%EF%BC%89%EF%BC%9ASpark%E5%8E%9F%E7%90%86%E7%AE%80%E8%BF%B0/">Spark学习(一):Spark原理简述</a></h1><div class="post-meta"><span class="post-meta-item post-meta-item--createtime"><span class="post-meta-item__icon"><i class="far fa-calendar-plus"></i></span><span class="post-meta-item__info">发表于</span><span class="post-meta-item__value">2020-09-26</span></span><span class="post-meta-item post-meta-item--updatetime"><span class="post-meta-item__icon"><i class="far fa-calendar-check"></i></span><span class="post-meta-item__info">更新于</span><span class="post-meta-item__value">2020-09-28</span></span></div></header><div class="post-body"><div class="post-excerpt">
<h3 id="Spark是什么?" >
<a href="#Spark是什么?" class="heading-link"><i class="fas fa-link"></i></a>Spark是什么?</h3>
<p> Apache Spark最初由美国加州伯克利大学的AMP实验室于2009年开发,是专为大规模数据处理而设计的快速通用的计算引擎,可用于构建大型的,低延迟的数据分析应用程序。</p>
<h3 id="Spark特点:" >
<a href="#Spark特点:" class="heading-link"><i class="fas fa-link"></i></a>Spark特点:</h3>
<ol>
<li>运行速度块:Spark猜用DAG执行引擎,以支持循环数据流与内存计算,基于内存的执行速度可比Hadoop MapReduce快上百倍,基于磁盘的速度也能快十倍</li>
<li>容易使用:Spark支持使用Scala、Java、Python和R语言进行编程,简洁的API设计有助于用户轻松构建并行程序,并且可以通过Spark Shell进行交互式编程</li>
<li>通用性:Spark提供了完整而强大的技术栈,包括SQL查询、流式计算、机器学习和图算法组件,这些组件可以无缝整合在同一个应用中,足以应对复杂的计算;</li>
<li>运行模式多样:Spark可运行于独立的集群模式中,或者运行于Hadoop中,也可运行于Amazon EC2等云环境中,并且可以访问HDFS、Cassandra、HBase、Hive等多种数据源。</li>
</ol>
<blockquote>
<p><span class="exturl"><a class="exturl__link" target="_blank" rel="noopener" href="https://blspark基础教程og.csdn.net/qq_17677907/article/details/88685705" >Spark基础教程</a><span class="exturl__icon"><i class="fas fa-external-link-alt"></i></span></span></p>
</blockquote>
<h3 id="Apache-Spark的应用:" >
<a href="#Apache-Spark的应用:" class="heading-link"><i class="fas fa-link"></i></a>Apache Spark的应用:</h3>
<ol>
<li>数据继承:系统生成的数据不够整合,无法结合进行分析。要从系统中获取一致的数据,可以使用提取,转换和加载(ETL)等过程。Spark用于减少此ETL过程所需的成本和时间。</li>
<li>流处理:Spark能够运行数据流并拒绝潜在的欺诈性操作。</li>
<li>机器学习:Spark能够将数据存储在内存中并且可以快速运行重复查询,因此可以轻松处理机器学习算法。</li>
<li>交互式分析:Spark能够快速生成响应。因此可以交互式的处理数据,而不是运行预定义的查询。</li>
</ol>
<blockquote>
<p><span class="exturl"><a class="exturl__link" target="_blank" rel="noopener" href="https://www.yiibai.com/spark/apache-spark-introduction.html" >Spark 简介</a><span class="exturl__icon"><i class="fas fa-external-link-alt"></i></span></span></p>
</blockquote>
<h3 id="Spark分布式计算原理" >
<a href="#Spark分布式计算原理" class="heading-link"><i class="fas fa-link"></i></a>Spark分布式计算原理</h3>
</div></div></article><article class="postlist-item post"><header class="post-header"><h1 class="post-title"><a class="post-title__link" href="/%E6%9D%82%E4%B8%83%E6%9D%82%E5%85%AB%E7%9A%84%E4%B8%9C%E8%A5%BF/hexo%E4%BD%BF%E7%94%A8%E6%95%99%E7%A8%8B/">hexo使用技巧</a></h1><div class="post-meta"><span class="post-meta-item post-meta-item--createtime"><span class="post-meta-item__icon"><i class="far fa-calendar-plus"></i></span><span class="post-meta-item__info">发表于</span><span class="post-meta-item__value">2020-09-26</span></span><span class="post-meta-item post-meta-item--updatetime"><span class="post-meta-item__icon"><i class="far fa-calendar-check"></i></span><span class="post-meta-item__info">更新于</span><span class="post-meta-item__value">2020-09-26</span></span></div></header><div class="post-body"><div class="post-excerpt"><h3 id="Hexo-使用技巧" >
<a href="#Hexo-使用技巧" class="heading-link"><i class="fas fa-link"></i></a>Hexo 使用技巧</h3></div><div class="post-readmore"><a class="post-readmore__link" href="/%E6%9D%82%E4%B8%83%E6%9D%82%E5%85%AB%E7%9A%84%E4%B8%9C%E8%A5%BF/hexo%E4%BD%BF%E7%94%A8%E6%95%99%E7%A8%8B/"><span class="post-readmore__text">阅读全文</span><span class="post-readmore__icon"><i class="fas fa-long-arrow-alt-right"></i></span></a></div></div></article></section><nav class="paginator"><div class="paginator-inner"><span class="page-number current">1</span></div></nav></div></div><div class="sidebar-wrap" id="sidebar-wrap"><aside class="sidebar" id="sidebar"><section class="sidebar-toc hide"></section><!-- ov = overview--><section class="sidebar-ov"><div class="sidebar-ov-author"><div class="sidebar-ov-author__avatar"><img class="sidebar-ov-author__avatar_img" src="/assets/profile.jfif" alt="avatar"></div><p class="sidebar-ov-author__text">书山有路勤为径,学海无涯苦作舟</p></div><div class="sidebar-ov-social"><a class="sidebar-ov-social-item" href="https://github.com/gxxxh" target="_blank" rel="noopener" data-popover="Github" data-popover-pos="up"><span class="sidebar-ov-social-item__icon"><i class="fab fa-github"></i></span></a></div><div class="sidebar-ov-state"><a class="sidebar-ov-state-item sidebar-ov-state-item--posts" href="/archives/"><div class="sidebar-ov-state-item__count">4</div><div class="sidebar-ov-state-item__name">归档</div></a><a class="sidebar-ov-state-item sidebar-ov-state-item--categories" href="/categories/"><div class="sidebar-ov-state-item__count">2</div><div class="sidebar-ov-state-item__name">分类</div></a><a class="sidebar-ov-state-item sidebar-ov-state-item--tags" href="/tags/"><div class="sidebar-ov-state-item__count">2</div><div class="sidebar-ov-state-item__name">标签</div></a></div><div class="sidebar-ov-cc"><a href="https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en" target="_blank" rel="noopener" data-popover="知识共享许可协议" data-popover-pos="up"><img src="/images/cc-by-nc-sa.svg"></a></div></section></aside></div><div class="clearfix"></div></div></main><footer class="footer" id="footer"><div class="footer-inner"><div><span>Copyright © 2021</span><span class="footer__devider"></span><span>GXH</span></div></div></footer><div class="loading-bar" id="loading-bar"><div class="loading-bar__progress"></div></div><div class="back2top" id="back2top"><span class="back2top__icon"><i class="fas fa-rocket"></i></span></div></div><script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jquery.min.js"></script><script src="https://cdn.jsdelivr.net/npm/[email protected]/velocity.min.js"></script><script src="https://cdn.jsdelivr.net/npm/[email protected]/velocity.ui.min.js"></script><script src="/js/utils.js?v=2.1.1"></script><script src="/js/stun-boot.js?v=2.1.1"></script><script src="/js/scroll.js?v=2.1.1"></script><script src="/js/header.js?v=2.1.1"></script><script src="/js/sidebar.js?v=2.1.1"></script></body></html>