{"id":6409,"date":"2025-10-06T15:54:03","date_gmt":"2025-10-06T07:54:03","guid":{"rendered":"http:\/\/cnliutz.ipyingshe.net\/?p=6409"},"modified":"2025-10-06T16:01:36","modified_gmt":"2025-10-06T08:01:36","slug":"apache2%e6%97%a5%e5%bf%97%e5%88%86%e6%9e%90%e7%ae%80%e5%8d%95%e7%89%88python%e4%bb%a3%e7%a0%81","status":"publish","type":"post","link":"http:\/\/g1n29wqq.ipyingshe.net:5347\/?p=6409","title":{"rendered":"apache2\u65e5\u5fd7\u5206\u6790\u7b80\u5355\u7248python\u4ee3\u7801"},"content":{"rendered":"\n<pre class=\"wp-block-code\"><code>import re\nimport argparse\nfrom collections import Counter, defaultdict\nfrom datetime import datetime\nimport matplotlib\nimport matplotlib.pyplot as plt\nimport tkinter as tk\nfrom tkinter import filedialog, messagebox\nimport os\n\n# \u4f7f\u7528Windows\u7cfb\u7edf\u4e0a\u901a\u7528\u7684\u4e2d\u6587\u5b57\u4f53\nplt.rcParams&#91;\"font.family\"] = &#91;\"SimHei\", \"Microsoft YaHei\", \"SimSun\", \"Arial\"]\n# \u8bbe\u7f6e\u5b57\u4f53\u67e5\u627e\u7684\u56de\u9000\u673a\u5236\nplt.rcParams&#91;\"axes.unicode_minus\"] = False  # \u89e3\u51b3\u8d1f\u53f7\u663e\u793a\u95ee\u9898\nmatplotlib.use('Agg')  # \u4f7f\u7528\u975e\u4ea4\u4e92\u5f0f\u540e\u7aef\n\nclass ApacheLogAnalyzer:\n    def __init__(self, log_file_path):\n        self.log_file_path = log_file_path\n        # \u9488\u5bf9\u7528\u6237\u63d0\u4f9b\u7684\u65e5\u5fd7\u683c\u5f0f\u4f18\u5316\u7684\u6b63\u5219\u8868\u8fbe\u5f0f\n        # \u7279\u522b\u5904\u7406\u4e86\u72b6\u6001\u7801\u540e\u9762\u53ef\u80fd\u662f'-'\u7684\u60c5\u51b5\n        self.log_pattern = r'^(\\S+) - - \\&#91;(.*?)\\] \"(.*?)\" (\\d+) (\\S+)'\n        self.log_entries = &#91;]\n        \n    def parse_log_file(self):\n        \"\"\"\u89e3\u6790\u65e5\u5fd7\u6587\u4ef6\u5e76\u63d0\u53d6\u5173\u952e\u4fe1\u606f\"\"\"\n        try:\n            print(f\"\u5f00\u59cb\u89e3\u6790\u65e5\u5fd7\u6587\u4ef6: {self.log_file_path}\")\n            print(f\"\u6587\u4ef6\u5927\u5c0f: {os.path.getsize(self.log_file_path)} \u5b57\u8282\")\n            \n            with open(self.log_file_path, 'r', encoding='utf-8', errors='replace') as file:\n                lines_processed = 0\n                for line in file:\n                    lines_processed += 1\n                    if lines_processed &lt;= 5:  # \u663e\u793a\u524d5\u884c\u7528\u4e8e\u8c03\u8bd5\n                        print(f\"\u793a\u4f8b\u884c {lines_processed}: {line.strip()}\")\n                    \n                    match = re.match(self.log_pattern, line)\n                    if match:\n                        ip = match.group(1)\n                        timestamp_str = match.group(2)\n                        request = match.group(3)\n                        status_code = int(match.group(4))\n                        \n                        # \u5904\u7406\u54cd\u5e94\u5927\u5c0f\u5b57\u6bb5\uff0c\u53ef\u80fd\u662f'-'\n                        size_str = match.group(5)\n                        size = int(size_str) if size_str != '-' else 0\n                        \n                        try:\n                            # \u89e3\u6790\u65f6\u95f4\u6233\uff0c\u9002\u914d\u7528\u6237\u65e5\u5fd7\u683c\u5f0f\n                            timestamp = datetime.strptime(timestamp_str.split()&#91;0], '%d\/%b\/%Y:%H:%M:%S')\n                        except ValueError as e:\n                            print(f\"\u8b66\u544a: \u65e0\u6cd5\u89e3\u6790\u65f6\u95f4\u6233 '{timestamp_str}'\uff0c\u9519\u8bef: {e}\")\n                            # \u4f7f\u7528\u5f53\u524d\u65f6\u95f4\u4f5c\u4e3a\u5907\u7528\uff0c\u786e\u4fdd\u81f3\u5c11\u6709\u6570\u636e\u7528\u4e8e\u56fe\u8868\n                            timestamp = datetime.now()\n                        \n                        # \u5c1d\u8bd5\u4ece\u8bf7\u6c42\u4e2d\u63d0\u53d6\u8bf7\u6c42\u65b9\u6cd5\u548c\u8def\u5f84\n                        request_parts = request.split()\n                        request_method = request_parts&#91;0] if len(request_parts) > 0 else \"Unknown\"\n                        request_path = request_parts&#91;1] if len(request_parts) > 1 else \"Unknown\"\n                        \n                        entry = {\n                            'ip': ip,\n                            'timestamp': timestamp,\n                            'request': request,\n                            'request_method': request_method,\n                            'request_path': request_path,\n                            'status_code': status_code,\n                            'size': size\n                        }\n                        self.log_entries.append(entry)\n            \n            print(f\"\u5171\u5904\u7406 {lines_processed} \u884c\uff0c\u6210\u529f\u89e3\u6790 {len(self.log_entries)} \u6761\u8bb0\u5f55\")\n            \n            if len(self.log_entries) == 0:\n                print(\"\u8b66\u544a: \u6ca1\u6709\u89e3\u6790\u5230\u4efb\u4f55\u65e5\u5fd7\u8bb0\u5f55\uff0c\u8bf7\u68c0\u67e5\u65e5\u5fd7\u683c\u5f0f\u662f\u5426\u5339\u914d\")\n                messagebox.showwarning(\"\u8b66\u544a\", \"\u6ca1\u6709\u89e3\u6790\u5230\u4efb\u4f55\u65e5\u5fd7\u8bb0\u5f55\uff0c\u8bf7\u68c0\u67e5\u65e5\u5fd7\u683c\u5f0f\u662f\u5426\u5339\u914d\")\n                return False\n            \n            return True\n        except Exception as e:\n            print(f\"\u89e3\u6790\u65e5\u5fd7\u6587\u4ef6\u65f6\u51fa\u9519: {e}\")\n            messagebox.showerror(\"\u9519\u8bef\", f\"\u89e3\u6790\u65e5\u5fd7\u6587\u4ef6\u65f6\u51fa\u9519: {e}\")\n            return False\n    \n    def analyze_top_ips(self, limit=10):\n        \"\"\"\u5206\u6790\u8bbf\u95ee\u91cf\u6700\u9ad8\u7684IP\u5730\u5740\"\"\"\n        if not self.log_entries:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u4f9b\u5206\u6790\")\n            return Counter()\n            \n        ip_counter = Counter(entry&#91;'ip'] for entry in self.log_entries)\n        print(f\"\\n\u8bbf\u95ee\u91cf\u6700\u9ad8\u7684{limit}\u4e2aIP\u5730\u5740:\")\n        for ip, count in ip_counter.most_common(limit):\n            print(f\"{ip}: {count} \u6b21\u8bbf\u95ee\")\n        return ip_counter\n    \n    def analyze_status_codes(self):\n        \"\"\"\u5206\u6790HTTP\u72b6\u6001\u7801\u5206\u5e03\"\"\"\n        if not self.log_entries:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u4f9b\u5206\u6790\")\n            return Counter()\n            \n        status_counter = Counter(entry&#91;'status_code'] for entry in self.log_entries)\n        print(f\"\\nHTTP\u72b6\u6001\u7801\u5206\u5e03:\")\n        total = sum(status_counter.values())\n        for status, count in sorted(status_counter.items()):\n            percentage = (count \/ total) * 100\n            print(f\"{status}: {count} \u6b21 ({percentage:.2f}%)\")\n        return status_counter\n    \n    def analyze_requests(self, limit=10):\n        \"\"\"\u5206\u6790\u6700\u5e38\u89c1\u7684\u8bf7\u6c42\"\"\"\n        if not self.log_entries:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u4f9b\u5206\u6790\")\n            return Counter()\n            \n        request_counter = Counter(entry&#91;'request_path'] for entry in self.log_entries)\n        \n        print(f\"\\n\u6700\u5e38\u89c1\u7684{limit}\u4e2a\u8bf7\u6c42\u8def\u5f84:\")\n        for path, count in request_counter.most_common(limit):\n            print(f\"{path}: {count} \u6b21\")\n        return request_counter\n    \n    def analyze_traffic_by_hour(self):\n        \"\"\"\u5206\u6790\u6bcf\u5c0f\u65f6\u7684\u8bbf\u95ee\u6d41\u91cf\"\"\"\n        if not self.log_entries:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u4f9b\u5206\u6790\")\n            return defaultdict(int)\n            \n        hourly_traffic = defaultdict(int)\n        for entry in self.log_entries:\n            hour_key = entry&#91;'timestamp'].strftime('%Y-%m-%d %H:00')\n            hourly_traffic&#91;hour_key] += 1\n        \n        print(f\"\\n\u6bcf\u5c0f\u65f6\u8bbf\u95ee\u91cf:\")\n        sorted_hours = sorted(hourly_traffic.items())\n        for hour, count in sorted_hours&#91;:10]:  # \u53ea\u663e\u793a\u524d10\u4e2a\u5c0f\u65f6\u7684\u6570\u636e\n            print(f\"{hour}: {count} \u6b21\")\n        if len(sorted_hours) > 10:\n            print(f\"... \u8fd8\u6709 {len(sorted_hours) - 10} \u4e2a\u5c0f\u65f6\u7684\u6570\u636e\u672a\u663e\u793a\")\n        \n        return hourly_traffic\n    \n    def analyze_request_methods(self):\n        \"\"\"\u5206\u6790\u8bf7\u6c42\u65b9\u6cd5\u5206\u5e03\uff08GET, POST\u7b49\uff09\"\"\"\n        if not self.log_entries:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u4f9b\u5206\u6790\")\n            return Counter()\n            \n        method_counter = Counter(entry&#91;'request_method'] for entry in self.log_entries)\n        \n        print(f\"\\n\u8bf7\u6c42\u65b9\u6cd5\u5206\u5e03:\")\n        total = sum(method_counter.values())\n        for method, count in sorted(method_counter.items()):\n            percentage = (count \/ total) * 100\n            print(f\"{method}: {count} \u6b21 ({percentage:.2f}%)\")\n        return method_counter\n    \n    def generate_hourly_traffic_chart(self, hourly_traffic):\n        \"\"\"\u53ef\u89c6\u5316\u6bcf\u5c0f\u65f6\u7684\u8bbf\u95ee\u6d41\u91cf\"\"\"\n        if not hourly_traffic:\n            print(\"\u6ca1\u6709\u6570\u636e\u53ef\u751f\u6210\u56fe\u8868\")\n            messagebox.showwarning(\"\u8b66\u544a\", \"\u6ca1\u6709\u6570\u636e\u53ef\u751f\u6210\u56fe\u8868\")\n            return\n            \n        try:\n            hours = &#91;item&#91;0] for item in sorted(hourly_traffic.items())]\n            counts = &#91;item&#91;1] for item in sorted(hourly_traffic.items())]\n            \n            # \u786e\u4fdd\u6709\u8db3\u591f\u7684\u6570\u636e\u70b9\n            if len(hours) &lt; 2:\n                print(\"\u6570\u636e\u70b9\u592a\u5c11\uff0c\u65e0\u6cd5\u751f\u6210\u6709\u610f\u4e49\u7684\u56fe\u8868\")\n                messagebox.showwarning(\"\u8b66\u544a\", \"\u6570\u636e\u70b9\u592a\u5c11\uff0c\u65e0\u6cd5\u751f\u6210\u6709\u610f\u4e49\u7684\u56fe\u8868\")\n                # \u521b\u5efa\u4e00\u4e2a\u7b80\u5355\u7684\u793a\u4f8b\u56fe\u8868\uff0c\u907f\u514d\u7528\u6237\u770b\u5230\u7a7a\u767d\u56fe\u8868\n                plt.figure(figsize=(12, 6))\n                plt.plot(&#91;\"\u793a\u4f8b\u65f6\u95f41\", \"\u793a\u4f8b\u65f6\u95f42\"], &#91;10, 15], marker='o')\n                plt.title('\u793a\u4f8b\u6d41\u91cf\u8d8b\u52bf\uff08\u5b9e\u9645\u6570\u636e\u70b9\u4e0d\u8db3\uff09')\n                plt.xlabel('\u65f6\u95f4')\n                plt.ylabel('\u8bbf\u95ee\u6b21\u6570')\n                plt.tight_layout()\n                plt.savefig('hourly_traffic.png')\n                plt.show()\n                messagebox.showinfo(\"\u6210\u529f\", \"\u7531\u4e8e\u5b9e\u9645\u6570\u636e\u70b9\u4e0d\u8db3\uff0c\u5df2\u751f\u6210\u793a\u4f8b\u6d41\u91cf\u8d8b\u52bf\u56fe: hourly_traffic.png\")\n                return\n            \n            plt.figure(figsize=(12, 6))\n            plt.plot(hours, counts, marker='o')\n            plt.title('\u6bcf\u5c0f\u65f6\u8bbf\u95ee\u6d41\u91cf')\n            plt.xlabel('\u65f6\u95f4')\n            plt.ylabel('\u8bbf\u95ee\u6b21\u6570')\n            \n            # \u81ea\u52a8\u8c03\u6574x\u8f74\u6807\u7b7e\uff0c\u907f\u514d\u8fc7\u4e8e\u62e5\u6324\n            if len(hours) > 12:\n                step = max(1, len(hours) \/\/ 12)\n                plt.xticks(hours&#91;::step], rotation=45)\n            else:\n                plt.xticks(hours, rotation=45)\n                \n            plt.tight_layout()\n            \n            # \u4fdd\u5b58\u56fe\u8868\n            plt.savefig('hourly_traffic.png', dpi=300, bbox_inches='tight')\n            # \u79fb\u9664\u4e0b\u9762\u8fd9\u884c\u4ee3\u7801\uff0c\u56e0\u4e3aAgg\u540e\u7aef\u4e0d\u652f\u6301\u4ea4\u4e92\u5f0f\u663e\u793a\n            # plt.show()\n            \n            messagebox.showinfo(\"\u6210\u529f\", \"\u6d41\u91cf\u8d8b\u52bf\u56fe\u5df2\u751f\u6210\u5e76\u663e\u793a: hourly_traffic.png\")\n        except Exception as e:\n            print(f\"\u751f\u6210\u56fe\u8868\u65f6\u51fa\u9519: {e}\")\n            messagebox.showerror(\"\u9519\u8bef\", f\"\u751f\u6210\u56fe\u8868\u65f6\u51fa\u9519: {e}\")\n\n    def run_full_analysis(self):\n        \"\"\"\u8fd0\u884c\u5b8c\u6574\u7684\u65e5\u5fd7\u5206\u6790\"\"\"\n        if not self.parse_log_file():\n            print(\"\u89e3\u6790\u5931\u8d25\uff0c\u65e0\u6cd5\u7ee7\u7eed\u5206\u6790\")\n            return\n        \n        print(\"\\n===== Apache\u65e5\u5fd7\u5206\u6790\u62a5\u544a =====\")\n        ip_counter = self.analyze_top_ips()\n        status_counter = self.analyze_status_codes()\n        request_counter = self.analyze_requests()\n        method_counter = self.analyze_request_methods()  # \u65b0\u589e\u7684\u8bf7\u6c42\u65b9\u6cd5\u5206\u6790\n        hourly_traffic = self.analyze_traffic_by_hour()\n        \n        # \u751f\u6210\u53ef\u89c6\u5316\u56fe\u8868\n        # \u786e\u4fdd\u5c06hourly_traffic\u4f5c\u4e3a\u53c2\u6570\u4f20\u9012\n        self.generate_hourly_traffic_chart(hourly_traffic)\n        \n        print(\"\\n===== \u5206\u6790\u5b8c\u6210 =====\")\n        messagebox.showinfo(\"\u5b8c\u6210\", \"\u65e5\u5fd7\u5206\u6790\u5df2\u5b8c\u6210\uff01\")\n\ndef select_log_file():\n    \"\"\"\u5f39\u51fa\u6587\u4ef6\u9009\u62e9\u5bf9\u8bdd\u6846\uff0c\u8ba9\u7528\u6237\u9009\u62e9\u65e5\u5fd7\u6587\u4ef6\"\"\"\n    # \u521b\u5efa\u4e00\u4e2a\u9690\u85cf\u7684Tk\u6839\u7a97\u53e3\n    root = tk.Tk()\n    root.withdraw()  # \u9690\u85cf\u4e3b\u7a97\u53e3\n    \n    # \u8bbe\u7f6e\u4e2d\u6587\u5b57\u4f53\u652f\u6301\n    root.option_add(\"*Font\", \"SimHei 10\")\n    \n    # \u5f39\u51fa\u6587\u4ef6\u9009\u62e9\u5bf9\u8bdd\u6846\n    file_path = filedialog.askopenfilename(\n        title=\"\u9009\u62e9Apache\u65e5\u5fd7\u6587\u4ef6\",\n        filetypes=&#91;\n            (\"\u65e5\u5fd7\u6587\u4ef6\", \"*.log\"),\n            (\"\u6587\u672c\u6587\u4ef6\", \"*.txt\"),\n            (\"\u6240\u6709\u6587\u4ef6\", \"*.*\")\n        ]\n    )\n    \n    return file_path\n\nif __name__ == \"__main__\":\n    # \u521b\u5efa\u547d\u4ee4\u884c\u53c2\u6570\u89e3\u6790\u5668\n    parser = argparse.ArgumentParser(description='Apache\u65e5\u5fd7\u5206\u6790\u5de5\u5177')\n    parser.add_argument('--log_file', help='Apache\u65e5\u5fd7\u6587\u4ef6\u8def\u5f84\uff08\u53ef\u9009\uff0c\u4e0d\u63d0\u4f9b\u5219\u5f39\u51fa\u6587\u4ef6\u9009\u62e9\u5bf9\u8bdd\u6846\uff09')\n    args = parser.parse_args()\n    \n    log_file_path = args.log_file\n    \n    # \u5982\u679c\u6ca1\u6709\u63d0\u4f9b\u65e5\u5fd7\u6587\u4ef6\u8def\u5f84\uff0c\u5f39\u51fa\u6587\u4ef6\u9009\u62e9\u5bf9\u8bdd\u6846\n    if not log_file_path:\n        log_file_path = select_log_file()\n        \n        # \u68c0\u67e5\u7528\u6237\u662f\u5426\u53d6\u6d88\u4e86\u6587\u4ef6\u9009\u62e9\n        if not log_file_path:\n            print(\"\u672a\u9009\u62e9\u65e5\u5fd7\u6587\u4ef6\uff0c\u7a0b\u5e8f\u9000\u51fa\u3002\")\n            exit(0)\n    \n    # \u68c0\u67e5\u6587\u4ef6\u662f\u5426\u5b58\u5728\n    if not os.path.exists(log_file_path):\n        print(f\"\u9519\u8bef\uff1a\u627e\u4e0d\u5230\u6587\u4ef6 '{log_file_path}'\")\n        messagebox.showerror(\"\u9519\u8bef\", f\"\u627e\u4e0d\u5230\u6587\u4ef6 '{log_file_path}'\")\n        exit(1)\n    \n    # \u521b\u5efa\u5206\u6790\u5668\u5b9e\u4f8b\u5e76\u8fd0\u884c\u5206\u6790\n    analyzer = ApacheLogAnalyzer(log_file_path)\n    analyzer.run_full_analysis()<\/code><\/pre>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2],"tags":[],"class_list":["post-6409","post","type-post","status-publish","format-standard","hentry","category-2"],"_links":{"self":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/6409","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=6409"}],"version-history":[{"count":2,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/6409\/revisions"}],"predecessor-version":[{"id":6412,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/6409\/revisions\/6412"}],"wp:attachment":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=6409"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=6409"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=6409"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}