Python排序小案例集

Python 排序 sorted & sort

1、Exampe1：给这些日期文本排序。

import time
a=['2月9日', '2月8日', '2月7日', '2月6日', '2月5日', '2月4日', '2月3日', '2月2日', '2月1日', '2月12日', '2月11日', '2月10日', '1月31日', '1月30日', '1月29日', '1月28日', '1月27日', '1月26日', '1月25日', '1月24日', '1月23日']

方法一：
b=np.reshape(sorted(a,key=lambda x: (int(re.search(r'(\d+)月',x).group(1)),int(re.search(r'(\d+)日',x).group(1)))),(7,3))

方法二:
c=np.reshape(sorted(a,key=lambda x:time.mktime(time.strptime('%d年' % 2020+x,'%Y年%m月%d日'))),(7,3))

print(b,c,sep='\n')

执行结果：
[['1月23日' '1月24日' '1月25日']
 ['1月26日' '1月27日' '1月28日']
 ['1月29日' '1月30日' '1月31日']
 ['2月1日' '2月2日' '2月3日']
 ['2月4日' '2月5日' '2月6日']
 ['2月7日' '2月8日' '2月9日']
 ['2月10日' '2月11日' '2月12日']]

[['1月23日' '1月24日' '1月25日']
 ['1月26日' '1月27日' '1月28日']
 ['1月29日' '1月30日' '1月31日']
 ['2月1日' '2月2日' '2月3日']
 ['2月4日' '2月5日' '2月6日']
 ['2月7日' '2月8日' '2月9日']
 ['2月10日' '2月11日' '2月12日']]

Process finished with exit code 0

2、Example2：根据数组b里的顺序排列a二维数组。

a = [["name",4],["fuck",2],["duck",8],["ook",9],["love",3],["abc",4]]
b = ["fuck","ook","love","abc","duck","name"]
# 排序
a=[y for x in b for y in a if y[0]==x]
# 新序列
c=[i[1] for  i in a]
print(a,c,sep='\n')

执行结果：
[['fuck', 2], ['ook', 9], ['love', 3], ['abc', 4], ['duck', 8], ['name', 4]]
[2, 9, 3, 4, 8, 4]

3、Example3：面试题根据数字大小排列 [{“a”: 3}, {“b”: 1}, {“c”: 5}]

lst=[{"a": 3}, {"b": 1}, {"c": 5}]
lst.sort(key=lambda x: list(x.values())[0])
print(lst)
# 原来不同加【0】也是可以的！！
print(sorted(lst,key=lambda x:list(x.values()),reverse=True))
# 骚操作：
print(sorted(lst,key=lambda x:x.get(*x)))

执行结果：
[{'b': 1}, {'a': 3}, {'c': 5}]
[{'c': 5}, {'a': 3}, {'b': 1}]

4、用numpy凑一个列表来排序：

import numpy as np
a=np.concatenate((np.random.randint(1,10,[10,2]),np.random.randint(30,2600,[10,1])),axis=1)
print(a)

执行结果：
[[   3    8  383]
 [   1    7  205]
 [   3    4  723]
 [   3    2  329]
 [   8    4  369]
 [   6    2  773]
 [   5    1 1520]
 [   4    8 1352]
 [   3    2  373]
 [   1    3 1013]]

Process finished with exit code 0

5、对二维数组进行排序：第二列按升序排序，第三列按降序排序。

import numpy as np

a = np.array(
    [
        [4, 2, 2562],
        [5, 7, 1028],
        [1, 6, 2007],
        [1, 9, 679],
        [3, 1, 1898],
        [5, 2, 2106],
        [8, 6, 1172],
        [4, 7, 242],
        [4, 3, 2127],
        [9, 7, 1981]
    ])

b = np.reshape(sorted(a, key=lambda x: (x[1], -x[2],)), (10, 3))
print(b)

执行结果：
[[   3    1 1898]
 [   4    2 2562]
 [   5    2 2106]
 [   4    3 2127]
 [   1    6 2007]
 [   8    6 1172]
 [   9    7 1981]
 [   5    7 1028]
 [   4    7  242]
 [   1    9  679]]

Process finished with exit code 0

封装成一个自定义函数：

import numpy as np

a = np.array(
    [
        [4, 2, 2562],
        [5, 7, 1028],
        [1, 6, 2007],
        [1, 9, 679],
        [3, 1, 1898],
        [5, 2, 2106],
        [8, 6, 1172],
        [4, 7, 242],
        [4, 3, 2127],
        [9, 7, 1981]
    ])

def two_d_list_sort(pl=list, il=list):
    key_set = ''
    for x in il:
        if x >= 0:
            key_set += 'x[%s],' % abs(x)
        elif x < 0:
            key_set += '-x[%s],' % abs(x)
        else:
            print('参数错误')
    return sorted(pl, key=lambda x: (eval(key_set)))

print(np.reshape(two_d_list_sort(a,[-1,2]),(10,3)))

执行结果：
[[   1    9  679]
 [   4    7  242]
 [   5    7 1028]
 [   9    7 1981]
 [   8    6 1172]
 [   1    6 2007]
 [   4    3 2127]
 [   5    2 2106]
 [   4    2 2562]
 [   3    1 1898]]

按字符串出现次数排序

原文地址：按字符串出现次数排序

给定一个字符串，将字符串里的字符按照出现的次数降序排列，并返回排列后的字符串。

示例 1：

输入："tree"
输出："eert"
解释：'e' 出现两次，'r' 和 't' 都只出现一次。因此 'e' 必须出现在 'r' 和 't' 之前。
说明：此外，"eetr" 也是正确的答案。

示例 2：

输入："cccaaa"
输出："cccaaa"
解释：'c' 和 'a' 都出现三次。
说明：此外，"aaaccc" 也是有效的答案。
注意："cacaca"是不正确的，因为相同的字母必须放在一起。

示例 3：

输入："Aabb"
输出："bbAa"
说明：此外，"bbaA" 也是正确的答案，但 "Aabb" 是不正确的。

方法1：

from collections import defaultdict

def sort_text(origin):
    s, t = origin, ''
    d = defaultdict(int)
    for k in s:
        d[k] += 1
    ls = sorted(d.items(), key=lambda x: x[1], reverse=True)
    for e in ls:
        t += e[0] * e[1]
    print(t)

for i in ['tree', 'cccaaa', 'Aabb']:
    sort_text(i)

方法二（调用counter方法，比defaultdict更直接）：

import collections

class Solution:
    def frequencySort(self, s: str) -> str:
        s = sorted(s)
        s.sort(key=collections.Counter(s).get, reverse=True)
        print(''.join(s))

a = Solution()
for i in ['tree', 'cccaaa', 'Aabb']:
    a.frequencySort(i)

方法三（不调用任何轮子）：

def get2(text):
    set_str = set(text)
    count_dict = {}
    s = ''
    for i in set_str:
        count_dict[i] = text.count(i)

    # new_count_list = sorted(list(zip(count_dict.keys(), count_dict.values())), key=lambda x: x[1])
    new_count_list = sorted(count_dict.items(), key=lambda x: x[1])
    for i in new_count_list[::-1]:
        s += i[0] * i[1]
    print(s)

for i in ['tree', 'cccaaa', 'Aabb']:
    get2(i)

方法四（同样用到str对象的count属性来排序，无需用轮子）：
注意：lambda x: (string.count(x), x)，不要写成 lambda x: string.count(x)

def func(string):
    list1 = [i for i in string]
    list1.sort(key=lambda x: (string.count(x), x), reverse=True)
    return ''.join(list1)

for i in ['tree', 'cccaaa', 'Aabb']:
    func(i)

方法五（继续是非常直接的counter方法）：

from collections import Counter

def daily387(s):
    print(''.join(k * v for k, v in Counter(s).most_common()))

for i in ['tree', 'cccaaa', 'Aabb']:
    daily387(i)

应用：对600万级别的字符串txt文件排序

import random
import string
from time import time
from collections import Counter
import base64

def picsTostr():
    image = ''
    # 将图片encode为二进制字符串方法一
    with open(image, 'rb') as f:
        str = base64.b64encode(f.read())
    print(type(str))

def getMillionStr(f):
    def genRandomString(slen):
        return ''.join(random.sample(string.ascii_letters + string.digits, slen))

    for i in range(100000):
        with open(f, mode='a', encoding='ascii') as txt:
            txt.write(genRandomString(slen=62))

def sortNo3(s):
    return ''.join(k * v for k, v in Counter(s).most_common())

if __name__ == '__main__':
    folder = r'E:\PyProjts\Python_Prefereneces/'
    f1 = '测试字符串600万.txt'
    f2 = '排序好的.txt'
    start = time()
    with open(file=folder + f1, mode='r', encoding='ascii') as txt:
        strAll = txt.read()
    start2 = time()
    print(f'字符串数量{len(strAll)}个')
    print(f'读取完成，用时{start2 - start}s')
    with open(file=folder + f2, mode='w', encoding='ascii') as txt:
        txt.write(sortNo3(strAll))
    print(f'排序+写入，用时{time() - start2}s')

在这里插入图片描述