lru_cache.py


#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2015 clowwindy
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function, \
    with_statement

import collections
import logging
import time

# this LRUCache is optimized for concurrency, not QPS #每秒查询率
# n: concurrency, keys stored in the cache
# m: visits not timed out, proportional to QPS * timeout
# get & set is O(1), not O(n). thus we can support very large n
# TODO: if timeout or QPS is too large, then this cache is not very efficient,
#       as sweep() causes long pause

class LRUCache(collections.MutableMapping):
    """This class is not thread safe"""

    def __init__(self, timeout=60, close_callback=None, *args, **kwargs):
        self.timeout = timeout
        self.close_callback = close_callback
        self._store = {}
        self._time_to_keys = collections.defaultdict(list)#默认类型是list
        self._keys_to_last_time = {}
        self._last_visits = collections.deque()#双端队列
        self._closed_values = set()
        self.update(dict(*args, **kwargs))  # use the free update to set keys LRUCache 类的构造函数中调用了 update 方法。 设定初始值
#添加到 _store 中
#在 Python 中,dict 类型的 update 方法用于将一个字典中的键值对更新或合并到另一个字典中。这个方法可以接受多种类型的参数作为输入,包括字典、可迭代对象或关键字参数。

    def __getitem__(self, key):
        # O(1)
        t = time.time()
        self._keys_to_last_time[key] = t#最后一次访问key
        self._time_to_keys[t].append(key)#defaultdict 默认是列表 一个时间 一个列表里面是访问的key 可能是主机名
        self._last_visits.append(t)
        return self._store[key]#返回记录值

    def __setitem__(self, key, value):
        # O(1)
        t = time.time()
        self._keys_to_last_time[key] = t#添加时间
        self._store[key] = value
        self._time_to_keys[t].append(key)
        self._last_visits.append(t)

    def __delitem__(self, key):
        # O(1)
        del self._store[key]
        del self._keys_to_last_time[key]

    def __iter__(self):
        return iter(self._store)#返回_store的迭代器

    def __len__(self):
        return len(self._store)

    def sweep(self):
        # O(m)
        now = time.time()
        c = 0 #调用一次置0
        while len(self._last_visits) > 0:#双端队列 存储的访问时间
            least = self._last_visits[0]#最初的访问时间
            if now - least  self.timeout:
                            value = self._store[key]
                            if value not in self._closed_values:#已关闭的值
                                self.close_callback(value)#对移除的值 调用回调函数
                                self._closed_values.add(value)
            self._last_visits.popleft()#从左侧移除 移除一个事件点的 运行过程 加进去 不到时间点就可以正常访问 _last_visits->_time_to_keys
            for key in self._time_to_keys[least]:#一个时间 调用的key 这个key可能被get过程更新过 所以要确定最后一次的访问时间  超时才会删除
                if key in self._store:
                    if now - self._keys_to_last_time[key] > self.timeout:
                        del self._store[key]
                        del self._keys_to_last_time[key]
                        c += 1
            del self._time_to_keys[least]#删除这个时间点
        if c:#有删除的
            self._closed_values.clear()#set清空
            logging.debug('%d keys swept' % c)

def test():
    c = LRUCache(timeout=0.3)

    c['a'] = 1
    assert c['a'] == 1

    time.sleep(0.5)
    c.sweep()
    assert 'a' not in c

    c['a'] = 2
    c['b'] = 3
    time.sleep(0.2)
    c.sweep()
    assert c['a'] == 2
    assert c['b'] == 3

    time.sleep(0.2)
    c.sweep()
    c['b']
    time.sleep(0.2)
    c.sweep()
    assert 'a' not in c
    assert c['b'] == 3

    time.sleep(0.5)
    c.sweep()
    assert 'a' not in c
    assert 'b' not in c

    global close_cb_called
    close_cb_called = False

    def close_cb(t):#t没有用
        global close_cb_called
        assert not close_cb_called
        close_cb_called = True

    c = LRUCache(timeout=0.1, close_callback=close_cb)
    c['s'] = 1
    c['s']
    time.sleep(0.1)
    c['s']
    time.sleep(0.3)
    c.sweep()

if __name__ == '__main__':
    test()



发表评论