{"version":1,"pages":[{"id":"-LkwjQW8eZDOd4Ohi_6l","title":"Introduction","pathname":"/blog","siteSpaceId":"sitesp_FdnJ2","description":""},{"id":"-LpEitAr3Z-3JwftQFWo","title":"Reading list","pathname":"/blog/reading-list","siteSpaceId":"sitesp_FdnJ2","description":""},{"id":"-M-HPK65arxcOs9An7A5","title":"Index","pathname":"/blog/seminal-theory/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Theory"}]},{"id":"-Lkwjt_VMLyYQLCAFX2s","title":"Impossibility of Distributed Consensus with One Faulty Process","pathname":"/blog/seminal-theory/index/untitled","siteSpaceId":"sitesp_FdnJ2","description":"https://groups.csail.mit.edu/tds/papers/Lynch/jacm85.pdf","breadcrumbs":[{"label":"Theory"},{"label":"Index"}]},{"id":"-Lkwm3_3MW5SR3nWzxqC","title":"Time, Clocks, and the Ordering of Events in a Distributed System","pathname":"/blog/seminal-theory/index/time-clocks-and-the-ordering-of-events-ina-distributed-system","siteSpaceId":"sitesp_FdnJ2","description":"https://amturing.acm.org/p558-lamport.pdf","breadcrumbs":[{"label":"Theory"},{"label":"Index"}]},{"id":"-Lkwmmj_05zg8WUyJ8KL","title":"Using Reasoning About Knowledge to analyze Distributed Systems","pathname":"/blog/seminal-theory/index/using-reasoning-about-knowledge-to-analyze-distributed-systems","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.cornell.edu/home/halpern/papers/UsingRAK.pdf","breadcrumbs":[{"label":"Theory"},{"label":"Index"}]},{"id":"-LkwnRZPb3qGyfQ-TXIB","title":"CAP Twelve Years Later: How the “Rules” Have Changed","pathname":"/blog/seminal-theory/index/cap-twelve-years-later-how-the-rules-have-changed","siteSpaceId":"sitesp_FdnJ2","description":"https://scholar.google.com/scholar?cluster=17642052422667212790","breadcrumbs":[{"label":"Theory"},{"label":"Index"}]},{"id":"-LkwnZ1ugKE4TyKGEBfj","title":"A Note on Distributed Computing","pathname":"/blog/seminal-theory/index/a-note-on-distributed-computing","siteSpaceId":"sitesp_FdnJ2","description":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.7628","breadcrumbs":[{"label":"Theory"},{"label":"Index"}]},{"id":"-MJDDGfVyS1e1y2j23eF","title":"Index","pathname":"/blog/operating-system/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Operating System"}]},{"id":"-M-HRHC6CPsqM7RB_G5P","title":"Index","pathname":"/blog/storage/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Storage"}]},{"id":"-Lylel4T8INxdh7eoPZn","title":"Tachyon: Reliable, Memory Speed Storage for Cluster Computing Frameworks","pathname":"/blog/storage/index/tachyon-reliable-memory-speed-storage-for-cluster-computing-frameworks","siteSpaceId":"sitesp_FdnJ2","description":"https://people.csail.mit.edu/matei/papers/2014/socc_tachyon.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-LpB4Q5MnIDGQUR1yLFT","title":"Exploiting Commutativity For Practical Fast Replication","pathname":"/blog/storage/index/exploiting-commutativity-for-practical-fast-replication","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/nsdi19-park.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-aKGUYc_A6UNOcinl","title":"Don’t Settle for Eventual: Scalable Causal Consistency for Wide-Area Storage with COPS","pathname":"/blog/storage/index/dont-settle-for-eventual-scalable-causal-consistency-for-wide-area-storage-with-cops","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.cmu.edu/~dga/papers/cops-sosp2011.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-a6MMbRuNBp1CmuV8","title":"Building Consistent Transactions with Inconsistent Replication","pathname":"/blog/storage/index/building-consistent-transactions-with-inconsistent-replication","siteSpaceId":"sitesp_FdnJ2","description":"https://syslab.cs.washington.edu/papers/tapir-tr14.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-_ngir56WDqAZszz-","title":"Managing Update Conflicts in Bayou, a Weakly Connected Replicated Storage System","pathname":"/blog/storage/index/managing-update-conflicts-in-bayou-a-weakly-connected-replicated-storage-system","siteSpaceId":"sitesp_FdnJ2","description":"http://db.cs.berkeley.edu/cs286/papers/bayou-sosp1995.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-LuJxBP0LrBJuYnQfCOs","title":"Spanner: Google's Globally-Distributed Database","pathname":"/blog/storage/index/spanner-googles-globally-distributed-database","siteSpaceId":"sitesp_FdnJ2","description":"https://ai.google/research/pubs/pub39966","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-_P9da0a3gTXXzs7S","title":"Bigtable: A Distributed Storage System for Structured Data","pathname":"/blog/storage/index/bigtable-a-distributed-storage-system-for-structured-data","siteSpaceId":"sitesp_FdnJ2","description":"https://static.googleusercontent.com/media/research.google.com/en//archive/bigtable-osdi06.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-YNBL11CsoXrPPTBa","title":"The Google File System","pathname":"/blog/storage/index/the-google-file-system","siteSpaceId":"sitesp_FdnJ2","description":"https://static.googleusercontent.com/media/research.google.com/en//archive/gfs-sosp2003.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-Y68xUm05EsTJ1PfN","title":"Dynamo: Amazon’s Highly Available Key-value Store","pathname":"/blog/storage/index/dynamo-amazons-highly-available-key-value-store","siteSpaceId":"sitesp_FdnJ2","description":"https://www.allthingsdistributed.com/files/amazon-dynamo-sosp2007.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-XXuPDnLpyTsGynC6","title":"Chord: A Scalable Peer-to-peer Lookup Service for Internet Applications","pathname":"/blog/storage/index/chord-a-scalable-peer-to-peer-lookup-service-for-internet-applications","siteSpaceId":"sitesp_FdnJ2","description":"https://pdos.csail.mit.edu/papers/chord:sigcomm01/chord_sigcomm.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Ll-XQq2cMWxg6eGWjc0","title":"Replicated Data Consistency Explained Through Baseball","pathname":"/blog/storage/index/replicated-data-consistency-explained-through-baseball","siteSpaceId":"sitesp_FdnJ2","description":"https://www.microsoft.com/en-us/research/wp-content/uploads/2011/10/ConsistencyAndBaseballReport.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-Lkwos0Q5sUV9w1ZnvGb","title":"Session Guarantees for Weakly Consistent Replicated Data","pathname":"/blog/storage/index/session-guarantees-for-weakly-consistent-replicated-data","siteSpaceId":"sitesp_FdnJ2","description":"http://www.cs.utexas.edu/~lorenzo/corsi/cs380d/papers/SessionGuaranteesBayou.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-LylftqiC2Xjoaubptyo","title":"Flat Datacenter Storage","pathname":"/blog/storage/index/flat-datacenter-storage","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/osdi12/osdi12-final-75.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-M1iiX9T37-GgLa799Wg","title":"Small Cache, Big Effect: Provable Load Balancing forRandomly Partitioned Cluster Services","pathname":"/blog/storage/index/small-cache-big-effect-provable-load-balancing-forrandomly-partitioned-cluster-services","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.cmu.edu/~fawnproj/papers/loadbal-socc2011.pdf","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-M1ihQGmiRipxCgbRF59","title":"DistCache: provable load balancing for large-scale storage systems with distributed caching","pathname":"/blog/storage/index/distcache-provable-load-balancing-for-large-scale-storage-systems-with-distributed-caching","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/fast19/presentation/liu","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-M594EuiFoq0VGFDa5aM","title":"Short Summaries","pathname":"/blog/storage/index/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Storage"},{"label":"Index"}]},{"id":"-M-HRViov32tjz_xgRuU","title":"Index","pathname":"/blog/coordination/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Coordination"}]},{"id":"-LkwmIsEEgkLbgygiXwq","title":"Logical Physical Clocks and Consistent Snapshots in Globally Distributed Databases","pathname":"/blog/coordination/index/logical-physical-clocks-and-consistent-snapshots-in-globally-distributed-databases","siteSpaceId":"sitesp_FdnJ2","description":"https://cse.buffalo.edu/tech-reports/2014-04.pdf","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-LkwotOpJL8OsrlSNOdP","title":"Paxos made simple","pathname":"/blog/coordination/index/paxos-made-simple","siteSpaceId":"sitesp_FdnJ2","description":"https://www.microsoft.com/en-us/research/uploads/prod/2016/12/paxos-simple-Copy.pdf","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-Ll-bAl864v6OfQlXk3p","title":"ZooKeeper: Wait-free coordination for Internet-scale systems","pathname":"/blog/coordination/index/zookeeper-wait-free-coordination-for-internet-scale-systems","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/legacy/event/atc10/tech/full_papers/Hunt.pdf","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-Ll-bgOLPaviJsFRTP4o","title":"Just Say NO to Paxos Overhead: Replacing Consensus with Network Ordering","pathname":"/blog/coordination/index/just-say-no-to-paxos-overhead-replacing-consensus-with-network-ordering","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/osdi16/osdi16-li.pdf","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-Ll-cC_6OU9b8D3evOf3","title":"Keeping CALM: When Distributed Consistency is Easy","pathname":"/blog/coordination/index/keeping-calm-when-distributed-consistency-is-easy","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/abs/1901.01930","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-Ll-caVYpdq5VMec8s_B","title":"In Search of an Understandable Consensus Algorithm","pathname":"/blog/coordination/index/in-search-of-an-understandable-consensus-algorithm","siteSpaceId":"sitesp_FdnJ2","description":"https://web.stanford.edu/~ouster/cgi-bin/papers/raft-atc14","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-Ll-crp0rXYes5dLaw7I","title":"A comprehensive study of Convergent and Commutative Replicated Data Types","pathname":"/blog/coordination/index/a-comprehensive-study-of-convergent-and-commutative-replicated-data-types","siteSpaceId":"sitesp_FdnJ2","description":"https://hal.inria.fr/inria-00555588/document","breadcrumbs":[{"label":"Coordination"},{"label":"Index"}]},{"id":"-M-HReOObSXSSFhzPDuy","title":"Index","pathname":"/blog/fault-tolerance/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Fault Tolerance"}]},{"id":"-MUFS8ha8cRTKArhx3ZF","title":"The Mystery Machine: End-to-end Performance Analysis of Large-scale Internet Services","pathname":"/blog/fault-tolerance/index/the-mystery-machine-end-to-end-performance-analysis-of-large-scale-internet-services","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-M2HdpqlLpwdbbLbhHi0","title":"Gray Failure: The Achilles’ Heel of Cloud-Scale Systems","pathname":"/blog/fault-tolerance/index/gray-failure","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.jhu.edu/~huang/paper/grayfailure-hotos17.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-M3-aBmULxXKb4PW94HZ","title":"Capturing and Enhancing In Situ System Observability for Failure Detection","pathname":"/blog/fault-tolerance/index/capturing-and-enhancing-in-situ-system-observability-for-failure-detection","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/osdi18/presentation/huang","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-M2H_Xl839s4FEzXcBKA","title":"Check before You Change: Preventing Correlated Failures in Service Updates","pathname":"/blog/fault-tolerance/index/check-before-you-change-preventing-correlated-failures-in-service-updates","siteSpaceId":"sitesp_FdnJ2","description":"https://ennanzhai.github.io/pub/nsdi20-cloudcanary.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-M0fDKjYUEhmHSCtL_la","title":"Efficient Scalable Thread-Safety-Violation Detection","pathname":"/blog/fault-tolerance/index/efficient-scalable-thread-safety-violation-detection","siteSpaceId":"sitesp_FdnJ2","description":"http://people.cs.uchicago.edu/~cstjygpl/sosp19-tsvd.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-LqA3CUU9qDHMO0jaX5U","title":"REPT: Reverse Debugging of Failures in Deployed Software","pathname":"/blog/fault-tolerance/index/rept-reverse-debugging-of-failures-in-deployed-software","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/osdi18/presentation/weidong","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-Lp_XVFHUD7R5kFH2rH5","title":"Redundancy Does Not Imply Fault Tolerance","pathname":"/blog/fault-tolerance/index/redundancy-does-not-imply-fault-tolerance","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/fast17/fast17-ganesan.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-Ll3SgOep_fPB0GCEt-A","title":"Fixed It For You:Protocol Repair Using Lineage Graphs","pathname":"/blog/fault-tolerance/index/fixed-it-for-you-protocol-repair-using-lineage-graphs","siteSpaceId":"sitesp_FdnJ2","description":"https://people.ucsc.edu/~palvaro/p122-oldenburg-cidr19.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-Ll-d6jCk1RgkZFMYjMC","title":"The Good, the Bad, and the Differences: Better Network Diagnostics with Differential Provenance","pathname":"/blog/fault-tolerance/index/the-good-the-bad-and-the-differences-better-network-diagnostics-with-differential-provenance","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.rice.edu/~angchen/papers/sigcomm-2016.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-Lkwov3Cw534jbmHW6aI","title":"Lineage-driven Fault Injection","pathname":"/blog/fault-tolerance/index/lineage-driven-fault-injection","siteSpaceId":"sitesp_FdnJ2","description":"https://people.ucsc.edu/~palvaro/molly.pdf","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-M4yl9MnSfHLsmHzewfR","title":"Short Summaries","pathname":"/blog/fault-tolerance/index/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Fault Tolerance"},{"label":"Index"}]},{"id":"-LuEiSEMqIjg3H7PiTwJ","title":"Index","pathname":"/blog/cloud-computing/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Cloud Computing"}]},{"id":"-Ll8LzD9VmtHwXildrzG","title":"Improving MapReduce Performance in Heterogeneous Environments","pathname":"/blog/cloud-computing/index/improving-mapreduce-performance-in-heterogeneous-environments","siteSpaceId":"sitesp_FdnJ2","description":"http://courses.cs.vt.edu/cs5204/fall12-kafura/Papers/MapReduce/Map-Reduce-Hadoop.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-q_fmR_ZpBstwhyS8","title":"CLARINET: WAN-Aware Optimization for Analytics Queries","pathname":"/blog/cloud-computing/index/clarinet-wan-aware-optimization-for-analytics-queries","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/osdi16/osdi16-viswanathan.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lkwox1GWbs0Dffx6LlP","title":"MapReduce: Simplified Data Processing on Large Clusters","pathname":"/blog/cloud-computing/index/mapreduce-simplified-data-processing-on-large-clusters","siteSpaceId":"sitesp_FdnJ2","description":"http://static.googleusercontent.com/media/research.google.com/en//archive/mapreduce-osdi04.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-qfD7geE7RS-hG4Qc","title":"Dryad: Distributed Data-Parallel Programs from Sequential Building Blocks","pathname":"/blog/cloud-computing/index/dryad-distributed-data-parallel-programs-from-sequentialbuilding-blocks","siteSpaceId":"sitesp_FdnJ2","description":"https://www.microsoft.com/en-us/research/wp-content/uploads/2007/03/eurosys07.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LxOnbtwIPrhCM0mJwcs","title":"Resource Management","pathname":"/blog/cloud-computing/index/resource-management","siteSpaceId":"sitesp_FdnJ2","description":"Source: https://ucbrise.github.io/cs294-rise-fa16/assets/slides/cluster_management_systems_overview.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll_VjtS3z5UuMEQYNqY","title":"Apache Hadoop YARN: Yet Another Resource Negotiator","pathname":"/blog/cloud-computing/index/apache-hadoop-yarn-yet-another-resource-negotiator","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cse.ust.hk/~weiwa/teaching/Fall15-COMP6611B/reading_list/YARN.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LxOnfFWTsU7-06Fw24h","title":"Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center","pathname":"/blog/cloud-computing/index/mesos-a-platform-for-fine-grained-resource-sharing-in-the-data-center","siteSpaceId":"sitesp_FdnJ2","description":"https://people.eecs.berkeley.edu/~alig/papers/mesos.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LyWrUhlsr62pqhEYqvC","title":"Dominant Resource Fairness: Fair Allocation of Multiple Resource Types","pathname":"/blog/cloud-computing/index/dominant-resource-fairness-fair-allocation-of-multiple-resource-types","siteSpaceId":"sitesp_FdnJ2","description":"https://cs.stanford.edu/~matei/papers/2011/nsdi_drf.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LxOnnqm9ydZV_H-5YiO","title":"Large-scale cluster management at Google with Borg","pathname":"/blog/cloud-computing/index/large-scale-cluster-management-at-google-with-borg","siteSpaceId":"sitesp_FdnJ2","description":"https://pdos.csail.mit.edu/6.824/papers/borg.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lkz4Uwn-c_8bh7cPFEe","title":"MapReduce Online","pathname":"/blog/cloud-computing/index/mapreduce-online","siteSpaceId":"sitesp_FdnJ2","description":"http://www.neilconway.org/docs/nsdi2010_hop.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-dPcXTJAlRDnFM2tR","title":"Delay Scheduling: A Simple Technique for Achieving Locality and Fairness in Cluster Scheduling","pathname":"/blog/cloud-computing/index/delay-scheduling-a-simple-technique-for-achieving-locality-and-fairness-in-cluster-scheduling","siteSpaceId":"sitesp_FdnJ2","description":"http://elmeleegy.com/khaled/papers/delay_scheduling.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LlAwEK1qZqTEyqGYeC3","title":"Reining in the Outliers in Map-Reduce Clusters using Mantri","pathname":"/blog/cloud-computing/index/reining-in-the-outliers-in-map-reduce-clusters-using-mantri","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/legacy/events/osdi10/tech/full_papers/Ananthanarayanan.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll5YODXuOETfmYJuK0J","title":"Effective Straggler Mitigation: Attack of the Clones","pathname":"/blog/cloud-computing/index/effective-straggler-mitigation-attack-of-the-clones","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/nsdi13/nsdi13-final231.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lkz5Napj8P0CZW7Syey","title":"Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing","pathname":"/blog/cloud-computing/index/resilient-distributed-datasets-a-fault-tolerant-abstraction-for-in-memory-cluster-computing","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/nsdi12/nsdi12-final138.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lkz71TYXNbhpA-cK6uC","title":"Discretized Streams: Fault-Tolerant Streaming Computation at Scale","pathname":"/blog/cloud-computing/index/discretized-streams-fault-tolerant-streaming-computation-at-scale","siteSpaceId":"sitesp_FdnJ2","description":"https://people.csail.mit.edu/matei/papers/2013/sosp_spark_streaming.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-pdSnCymaMrVaGiur","title":"Sparrow: Distributed, Low Latency Scheduling","pathname":"/blog/cloud-computing/index/sparrow-distributed-low-latency-scheduling","siteSpaceId":"sitesp_FdnJ2","description":"https://cs.stanford.edu/~matei/papers/2013/sosp_sparrow.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LluOjrDpto7tj6hrNaf","title":"Making Sense of Performance in Data Analytics Framework","pathname":"/blog/cloud-computing/index/making-sense-of-performance-in-data-analytics-framework","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/nsdi15/nsdi15-paper-ousterhout.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-qkADtQqaceNV9qUd","title":"Monotasks: Architecting for Performance Clarity in Data Analytics Frameworks","pathname":"/blog/cloud-computing/index/monotasks-architecting-for-performance-clarity-in-data-analytics-frameworks","siteSpaceId":"sitesp_FdnJ2","description":"http://kayousterhout.org/publications/sosp17-final183.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Ll-dkOsIN5__8SUNLm9","title":"Drizzle: Fast and Adaptable Stream Processing at Scale","pathname":"/blog/cloud-computing/index/drizzle-fast-and-adaptable-stream-processing-at-scale","siteSpaceId":"sitesp_FdnJ2","description":"http://shivaram.org/publications/drizzle-sosp17.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LnGy0IwOGMTLCiteU9G","title":"Naiad: A Timely Dataflow System","pathname":"/blog/cloud-computing/index/naiad-a-timely-dataflow-system","siteSpaceId":"sitesp_FdnJ2","description":"http://sigops.org/s/conferences/sosp/2013/papers/p439-murray.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LnxUO6uFPXDrO8Wr8vt","title":"The Dataflow Model:A Practical Approach to Balancing Correctness, Latency, and Cost in Massive-Scale","pathname":"/blog/cloud-computing/index/the-dataflow-model-a-practical-approach-to-balancing-correctness-latency-and-cost-in-massive-scale","siteSpaceId":"sitesp_FdnJ2","description":"https://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LoqdrnDJ9NTdEcqRst4","title":"Interruptible Tasks:Treating Memory Pressure AsInterrupts for Highly Scalable Data-Parallel Program","pathname":"/blog/cloud-computing/index/interruptible-tasks-treating-memory-pressure-asinterrupts-for-highly-scalable-data-parallel-progra","siteSpaceId":"sitesp_FdnJ2","description":"https://people.cs.uchicago.edu/~shanlu/paper/sosp15-itask.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lwo_dSGGd-Em8CgbI8_","title":"PACMan: Coordinated Memory Caching for Parallel Jobs","pathname":"/blog/cloud-computing/index/pacman-coordinated-memory-caching-for-parallel-jobs","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/ananthanarayanan","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LxZEF-AYu7AUyfB3A7A","title":"Multi-Resource Packing for Cluster Schedulers","pathname":"/blog/cloud-computing/index/multi-resource-packing-for-cluster-schedulers","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cs.cmu.edu/~xia/resources/Documents/grandl_sigcomm14.pdf","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-LzTsPaqzvH8HUeCqC-q","title":"Other interesting papers","pathname":"/blog/cloud-computing/index/other-interesting-papers","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Cloud Computing"},{"label":"Index"}]},{"id":"-Lpa6jnz6TmROOkmieVI","title":"Index","pathname":"/blog/ml-system/sys-ml-index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"}]},{"id":"-LmgktNAvzRvNEc10Cmd","title":"A Berkeley View of Systems Challenges for AI","pathname":"/blog/ml-system/sys-ml-index/a-berkeley-view-of-systems-challenges-for-ai","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/pdf/1712.05855.pdf","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-M3hYfFGS944i2U751ma","title":"Tiresias: A GPU Cluster Managerfor Distributed Deep Learning","pathname":"/blog/ml-system/sys-ml-index/tiresias-a-gpu-cluster-managerfor-distributed-deep-learning","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/nsdi19-gu.pdf","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-M3hYzTW1EyA2OBdJ8r5","title":"Gandiva: Introspective Cluster Scheduling for Deep Learning","pathname":"/blog/ml-system/sys-ml-index/gandiva-introspective-cluster-scheduling-for-deep-learning","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/osdi18/presentation/xiao","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-M1MN4WlrKcGw2hdG5pr","title":"Workshop papers","pathname":"/blog/ml-system/sys-ml-index/workshop-papers","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LsckeM4tN8fFWt46Jzo","title":"Hidden Technical Debt in Machine Learning Systems","pathname":"/blog/ml-system/sys-ml-index/hidden-technical-debt-in-machine-learning-systems","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LsovxKKToSzLn7YZUSY","title":"Inference Systems","pathname":"/blog/ml-system/sys-ml-index/inference","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-Ls-g_19KJBK77xg8sXs","title":"Parameter Servers and AllReduce","pathname":"/blog/ml-system/sys-ml-index/parameter-servers","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LnxRJePSisZbOlUD-QA","title":"Federated Learning at Scale - Part I","pathname":"/blog/ml-system/sys-ml-index/towards-federated-learning-at-scale-system-design","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LxhSTV3fB3JFEcOqAsI","title":"Federated Learning at Scale - Part II","pathname":"/blog/ml-system/sys-ml-index/federated-learning-at-scale-part-i","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LuOMmZZ2NrktjWHC7ww","title":"Learning From Non-IID data","pathname":"/blog/ml-system/sys-ml-index/learning-from-non-iid-data","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LmmJvD-UbdLCF1MAe7t","title":"Ray: A Distributed Framework for Emerging AI Applications","pathname":"/blog/ml-system/sys-ml-index/ray-a-distributed-framework-for-emerging-ai-applications","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/osdi18-moritz.pdf","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-M-IXos0Dsd7jTkeHSwA","title":"PipeDream: Generalized Pipeline Parallelism for DNN Training","pathname":"/blog/ml-system/sys-ml-index/pipedream-generalized-pipeline-parallelism-for-dnn-training","siteSpaceId":"sitesp_FdnJ2","description":"https://cs.stanford.edu/~matei/papers/2019/sosp_pipedream.pdf","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-M2toHhPkuEiaQDtUlYE","title":"DeepXplore: Automated Whitebox Testingof Deep Learning Systems","pathname":"/blog/ml-system/sys-ml-index/deepxplore-automated-whitebox-testingof-deep-learning-systems","siteSpaceId":"sitesp_FdnJ2","description":"http://www.cs.columbia.edu/~junfeng/papers/deepxplore-sosp17.pdf","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LuU52BycH6KGRMuW8IX","title":"Distributed Machine Learning Misc.","pathname":"/blog/ml-system/sys-ml-index/misc-1","siteSpaceId":"sitesp_FdnJ2","description":"Short summaries","breadcrumbs":[{"label":"Systems for ML"},{"label":"Index"}]},{"id":"-LsJ8R8mCaS1xVqbg8fG","title":"Index","pathname":"/blog/ml-for-systems/ml-sys-index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"ML for Systems"}]},{"id":"-M4WUgOc3A9zFwsr9K90","title":"Short Summaries","pathname":"/blog/ml-for-systems/ml-sys-index/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"ML for Systems"},{"label":"Index"}]},{"id":"-LvM_WgIJBWhaI_jjso-","title":"Index","pathname":"/blog/machine-learning/untitled","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Machine Learning"}]},{"id":"-LyRtUdPxEgCV_gwq7ni","title":"Deep Learning with Differential Privacy","pathname":"/blog/machine-learning/untitled/dl-fl-with-differential-privacy","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/pdf/1607.00133.pdf","breadcrumbs":[{"label":"Machine Learning"},{"label":"Index"}]},{"id":"-LvIoA2GSxaUutbOVY_w","title":"Accelerating Deep Learning via Importance Sampling","pathname":"/blog/machine-learning/untitled/accelerating-deep-learning-by-focusing-on-the-biggest-losers","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/abs/1910.00762","breadcrumbs":[{"label":"Machine Learning"},{"label":"Index"}]},{"id":"-LrMOe7SbqOHtOYY3wl_","title":"A Few Useful Things to Know About Machine Learning","pathname":"/blog/machine-learning/untitled/ml","siteSpaceId":"sitesp_FdnJ2","description":"https://homes.cs.washington.edu/~pedrod/papers/cacm12.pdf","breadcrumbs":[{"label":"Machine Learning"},{"label":"Index"}]},{"id":"-M9621i6pmLpQzlE_uSv","title":"Index","pathname":"/blog/video/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Video Analytics"}]},{"id":"-LupSzl2qi1si98W4hpx","title":"Scaling Video Analytics on Constrained Edge Nodes","pathname":"/blog/video/index/untitled","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/pdf/1905.13536.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-LvNd1wk35C7qjT3WgUL","title":"Focus: Querying Large Video Datasets with Low Latency and Low Cost","pathname":"/blog/video/index/focus-querying-large-video-datasetswith-low-latency-and-low-cost","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/osdi18-hsieh.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-M-HSBRdl7JQ6hrpONFH","title":"NoScope: Optimizing Neural Network Queriesover Video at Scale","pathname":"/blog/video/index/noscope-optimizing-neural-network-queriesover-video-at-scale","siteSpaceId":"sitesp_FdnJ2","description":"http://www.vldb.org/pvldb/vol10/p1586-kang.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-M0j9WPMD92kTYbLc2uZ","title":"Live Video Analytics at Scale with Approximation and Delay-Tolerance","pathname":"/blog/video/index/live-video-analytics-at-scale-with-approximation-and-delay-tolerance","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/conference/nsdi17/nsdi17-zhang.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-M-HSHhlpcOyy3Ih0Bft","title":"Chameleon: Scalable Adaptation of Video Analytics","pathname":"/blog/video/index/chameleon-scalable-adaptation-of-video-analytics","siteSpaceId":"sitesp_FdnJ2","description":"https://people.cs.uchicago.edu/~junchenj/docs/Chameleon_SIGCOMM_CameraReady_faceblurred.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-MEzKKEgSwb3ZSXjoEyp","title":"End-to-end Learning of Action Detection from Frame Glimpses in Videos","pathname":"/blog/video/index/end-to-end-learning-of-action-detection-from-frame-glimpses-in-videos","siteSpaceId":"sitesp_FdnJ2","description":"https://arxiv.org/pdf/1511.06984.pdf","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-MGOfKnnVD5iKPvDzmJR","title":"Short Summaries","pathname":"/blog/video/index/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Video Analytics"},{"label":"Index"}]},{"id":"-MJDHbCsOQ6_cjnPklh0","title":"Index","pathname":"/blog/networking/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Networking"}]},{"id":"-M9EkskM_ihl6qdk1ikm","title":"Salsify: Low-Latency Network Video through Tighter Integration between a Video Codec and a Transport","pathname":"/blog/networking/index/salsify-low-latency-network-videothrough-tighter-integration-between-a-videocodec-and-a-transport","siteSpaceId":"sitesp_FdnJ2","description":"https://cs.stanford.edu/~keithw/salsify-paper.pdf","breadcrumbs":[{"label":"Networking"},{"label":"Index"}]},{"id":"-M3P-lT0EZJfSWy_Hi6C","title":"Learning in situ: a randomized experiment in video streaming","pathname":"/blog/networking/index/learning-in-situ-a-randomized-experiment-in-video-streaming","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/system/files/nsdi20-paper-yan.pdf","breadcrumbs":[{"label":"Networking"},{"label":"Index"}]},{"id":"-MJDISpSI4us0oXSRJeI","title":"Short Summaries","pathname":"/blog/networking/index/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Networking"},{"label":"Index"}]},{"id":"-MJDFYQ1BBavTSLXuUDQ","title":"Index","pathname":"/blog/serverless/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Serverless"}]},{"id":"-LpeQyWkLpYj2ws4e9J7","title":"Serverless Computing: One Step Forward, Two Steps Back","pathname":"/blog/serverless/index/serverless-computing-one-step-forward-two-steps-back","siteSpaceId":"sitesp_FdnJ2","description":"http://cidrdb.org/cidr2019/papers/p119-hellerstein-cidr19.pdf","breadcrumbs":[{"label":"Serverless"},{"label":"Index"}]},{"id":"-M962KrbTNw-PiGGhiVz","title":"Encoding, Fast and Slow: Low-Latency Video Processing Using Thousands of Tiny Threads","pathname":"/blog/serverless/index/untitled-1","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Serverless"},{"label":"Index"}]},{"id":"-MUf4u17JG2r2VW8VdoN","title":"SAND: Towards High-Performance Serverless Computing","pathname":"/blog/serverless/index/sand-towards-high-performance-serverless-computing","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Serverless"},{"label":"Index"}]},{"id":"-MUpZnqLS44CvpTohqfL","title":"Pocket: Elastic Ephemeral Storage for Serverless Analytics","pathname":"/blog/serverless/index/pocket-elastic-ephemeral-storage-for-serverless-analytics","siteSpaceId":"sitesp_FdnJ2","description":"https://www.usenix.org/conference/osdi18/presentation/klimovic","breadcrumbs":[{"label":"Serverless"},{"label":"Index"}]},{"id":"-MUgNaxA1lEdQNJYERsQ","title":"Fault-tolerant and Transactional Stateful Serverless Workflows","pathname":"/blog/serverless/index/fault-tolerant-and-transactional-stateful-serverless-workflows","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Serverless"},{"label":"Index"}]},{"id":"-MYJCiRpBYtmbjfzUY1Y","title":"Index","pathname":"/blog/resource-disaggregation/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Resource Disaggregation"}]},{"id":"-MLE2WptpGuFP9dwscAe","title":"Index","pathname":"/blog/edge-computing/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Edge Computing"}]},{"id":"-M0kaf-SrjsFv7RJOS31","title":"Index","pathname":"/blog/security-privacy/untitled","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Security/Privacy"}]},{"id":"-M3-23lxhDnZcxuyAhTU","title":"Differential Privacy","pathname":"/blog/security-privacy/untitled/differential-privacy","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Security/Privacy"},{"label":"Index"}]},{"id":"-M1mczpR_g2agQ6m1qV7","title":"Honeycrisp: Large-Scale Differentially Private Aggregation Without a Trusted Core","pathname":"/blog/security-privacy/untitled/honeycrisp-large-scale-differentially-private-aggregation-without-a-trusted-core","siteSpaceId":"sitesp_FdnJ2","description":"https://www.cis.upenn.edu/~ahae/papers/honeycrisp-sosp2019.pdf","breadcrumbs":[{"label":"Security/Privacy"},{"label":"Index"}]},{"id":"-M58qhF_q7aF7-SZ-74V","title":"Short Summaries","pathname":"/blog/security-privacy/untitled/short-summaries","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Security/Privacy"},{"label":"Index"}]},{"id":"-M3tWw4HDvcsVhtdvbit","title":"Index","pathname":"/blog/misc/index","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."}]},{"id":"-Lq3prYBBn4mye1ndy8_","title":"Rate Limiting","pathname":"/blog/misc/index/rate-limiting","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-M1iUCsgQlC7pYQroj2d","title":"Load Balancing","pathname":"/blog/misc/index/load-balancing","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-LpHFNjkrdA3EbF6q7er","title":"Consistency Models in Distributed System","pathname":"/blog/misc/index/consistency-models-in-distributed-system","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-LsZrZyUru6eKKso6p4j","title":"Managing Complexity","pathname":"/blog/misc/index/complexity","siteSpaceId":"sitesp_FdnJ2","description":"https://www.sciencedirect.com/book/9780123749574/principles-of-computer-system-design","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-Lsi9ckH5bKNa9MObQi-","title":"System Design","pathname":"/blog/misc/index/system-design","siteSpaceId":"sitesp_FdnJ2","description":"A list of talks/blog posts/papers/books which may be useful to learn about System Design.","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-Lx89mmHazT0VDERp34T","title":"Deep Dive into the Spark Scheduler","pathname":"/blog/misc/index/deep-dive-into-the-spark-scheduler","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-LqykyqfJovh-8u9cA0W","title":"The Actor Model","pathname":"/blog/misc/index/actor-model","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-M34MFHxX08KQ6cf7fiw","title":"Python Global Interpreter Lock","pathname":"/blog/misc/index/python-global-interpreter-lock","siteSpaceId":"sitesp_FdnJ2","description":"https://realpython.com/python-gil/","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]},{"id":"-LsZsG5DwPKOA87kfLke","title":"About Research and PhD","pathname":"/blog/misc/index/about-research-and-phd","siteSpaceId":"sitesp_FdnJ2","description":"","breadcrumbs":[{"label":"Misc."},{"label":"Index"}]}]}