对python 操作solr索引数据的实例详解

作者:shaomine 时间:2023-11-02 16:18:19 

测试代码1:


def test(self):
 data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
 params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
 url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
 headers = {"Content-Type": "application/json"}
 r = requests.post(url, json=data, params=params, headers=headers)
 print r.text

def Index_data(self):
 solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

# How you'd index data.
 result = solr.add([
  {
   "id": "doc_1",
   "title": "A test document",
  },
  {
   "id": "doc_2",
   "title": "The Banana: Tasty or Dangerous?",
  },
 ])
 print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解


def Index_Data_FromCSV(self, csvfile):
 '''
  从CSV文件中读取数据,并索引到solr中
  :param csvfile: csv文件,包括完整路径
  :return:
  '''
 list = CSVOP.ReadCSV(csvfile)
 index = 0
 doc = {}
 params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
 url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
 headers = {"Content-Type": "application/json"}
 for item in list:
  if index > 0: # 第一行是标题
   try:
    doc['title'] = item[0].decode('GB2312')
    doc['link'] = item[1]
    # doc['date'] = item[2]
    doc['source'] = item[3].decode('GB2312')
    doc['keyword'] = item[4].decode('GB2312')
    data = {"add": {"doc": doc}}
    r = requests.post(url, json=data, params=params, headers=headers)
    print r.text
   except Exception,e:
    print e.message

print index
  index += 1

#pysolr客户端代码
def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
 '''
  从CSV文件中读取数据,并索引到solr中
  :param csvfile: csv文件,包括完整路径
  :return:
  '''
 list = CSVOP.ReadCSV(csvfile)
 index = 0
 listdocs = []
 for item in list:
  if index > 0: # 第一行是标题
   doc = {}
   try:
    doc['title'] = item[0].decode('GB2312')
    doc['link'] = item[1]
    # doc['date'] = item[2]
    doc['source'] = item[3].decode('GB2312')
    doc['keyword'] = item[4].decode('GB2312')
    listdocs.append(doc)
   except Exception,e:
    print e.message
  index += 1
 solr = pysolr.Solr(url, timeout=10)
 result = solr.add(listdocs)
 print result

查询代码:


def search_data(self,message='视频'):
 url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
 r = requests.get(url, verify=False)
 print r.text
 r = r.json()['response']['numFound']
 print message + ":" + str(r)

#pysolr客户端
 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
 solr = pysolr.Solr(url, timeout=10)
 dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
 result = solr.search('title:视频',**dict)
 # result = solr.search('title:视频')
 # print result.raw_response['response']['numFound']

for item in result:
  print 'keyword: %s'% item['keyword']
  print 'title: %s'% item['title']
  print 'source: %s'% item['source']
  print 'link: %s'% item['link']
  print '

'

输出结果:


{
"responseHeader":{
"status":0,
"QTime":0,
"params":{
 "q":"title:\"\\视频\"",
 "indent":"true",
 "wt":"json"}},
"response":{"numFound":123,"start":0,"docs":[
 {
 "source":"中彩网",
 "link":"http://www.zhcw.com/video/kaijiangshipin-3D/11981126.shtml",
 "keyword":"视频",
 "title":"福彩3D开奖 视频 -中彩 视频",
 "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
 "_version_":1584214368617234432},
 {
 "source":"新浪视频",
 "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
 "keyword":"视频",
 "title":"今日热门 视频 汇总20170707",
 "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
 "_version_":1584214367507841024},
 {
 "source":"网易新闻",
 "link":"http://news.163.com/17/0707/13/COOCNUIE00018AOR.html",
 "keyword":"视频",
 "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
 "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
 "_version_":1584214367821365248},
 {
 "source":"凤凰视频",
 "link":"http://v.ifeng.com/video_7480871.shtml",
 "keyword":"视频",
 "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
 "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
 "_version_":1584214366819975168},
 {
 "source":"凤凰视频",
 "link":"http://v.ifeng.com/video_7805858.shtml",
 "keyword":"视频",
 "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
 "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
 "_version_":1584214367516229632},
 {
 "source":"凤凰视频",
 "link":"http://v.ifeng.com/video_7483506.shtml",
 "keyword":"视频",
 "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
 "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
 "_version_":1584214367647301632},
 {
 "source":"汽车杂志",
 "link":"http://www.jiemian.com/article/1445267.html",
 "keyword":"视频",
 "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
 "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
 "_version_":1584214368157958144},
 {
 "source":"味觉大师",
 "link":"http://www.jiemian.com/article/1453545.html",
 "keyword":"视频",
 "title":"【视频】大董没有肉的肉味烧茄子",
 "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
 "_version_":1584213891451191296},
 {
 "source":"新浪汽车",
 "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
 "keyword":"视频",
 "title":"视频 :两大神车pk!高尔夫思域怎么选?",
 "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
 "_version_":1584213892090822656},
 {
 "source":"味觉大师",
 "link":"http://www.jiemian.com/article/1453545.html",
 "keyword":"视频",
 "title":"【视频】大董没有肉的肉味烧茄子",
 "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
 "_version_":1584214366191878144}]
}}

完整代码:


#-*- coding: UTF-8 -*-
import csv
import os
import codecs

def ReadCSV(filename):
if os.path.exists(filename):
 with open(filename, 'r') as f:
  reader = csv.reader(f)
  list = []
  for item in reader:
   list.append(item)
  return list

#################################################
#coding=utf-8
import json
import requests

import os
import time
from os import walk
import CSVOP
from datetime import datetime
import pysolr
import math

class SolrClientObj:

def test(self):
 data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
 params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
 url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
 headers = {"Content-Type": "application/json"}
 r = requests.post(url, json=data, params=params, headers=headers)
 print r.text

def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
 '''
  从CSV文件中读取数据,并索引到solr中
  :param csvfile: csv文件,包括完整路径
  :return:
  '''
 list = CSVOP.ReadCSV(csvfile)
 index = 0
 listdocs = []
 for item in list:
  if index > 0: # 第一行是标题
   doc = {}
   try:
    doc['title'] = item[0].decode('GB2312')
    doc['link'] = item[1]
    # doc['date'] = item[2]
    doc['source'] = item[3].decode('GB2312')
    doc['keyword'] = item[4].decode('GB2312')
    listdocs.append(doc)
   except Exception,e:
    print e.message
  index += 1
 solr = pysolr.Solr(url, timeout=10)
 result = solr.add(listdocs)
 print result

def Index_Data_FromCSV(self, csvfile):
 '''
  从CSV文件中读取数据,并索引到solr中
  :param csvfile: csv文件,包括完整路径
  :return:
  '''
 list = CSVOP.ReadCSV(csvfile)
 index = 0
 doc = {}
 params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
 url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
 headers = {"Content-Type": "application/json"}
 for item in list:
  if index > 0: # 第一行是标题
   try:
    doc['title'] = item[0].decode('GB2312')
    doc['link'] = item[1]
    # doc['date'] = item[2]
    doc['source'] = item[3].decode('GB2312')
    doc['keyword'] = item[4].decode('GB2312')
    data = {"add": {"doc": doc}}
    r = requests.post(url, json=data, params=params, headers=headers)
    print r.text
   except Exception,e:
    print e.message

print index
  index += 1

def Index_data(self):
 solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

# How you'd index data.
 result = solr.add([
  {
   "id": "doc_1",
   "title": "A test document",
  },
  {
   "id": "doc_2",
   "title": "The Banana: Tasty or Dangerous?",
  },
 ])
 print result

def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
 solr = pysolr.Solr(url, timeout=10)
 dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
 result = solr.search('title:视频',**dict)
 # result = solr.search('title:视频')
 # print result.raw_response['response']['numFound']

for item in result:
  print 'keyword: %s'% item['keyword']
  print 'title: %s'% item['title']
  print 'source: %s'% item['source']
  print 'link: %s'% item['link']
  print '    '

def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
 '''
 删除索引
 :param where: 删除的条件
 :param url: url
 :return:
 '''
 solr = pysolr.Solr(url, timeout=10)
 # solr.delete(id=where) #id='id1':删除id为“id1”的索引
 result = solr.delete(q=where) #q='*:*'删除所有索引
 print result

obj = SolrClientObj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.Index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'
# obj.pysolr_Index_Data_FromCSV(csvfile)

来源:https://www.cnblogs.com/shaosks/p/7845576.html

标签:python,solr
0
投稿

猜你喜欢

  • 详解mysql中if函数的正确使用姿势

    2024-01-23 00:37:57
  • FrontPage2002简明教程二:文字与图像的处理

    2008-09-17 11:13:00
  • PHP composer更新指定依赖包过程详细讲解

    2023-05-27 18:05:34
  • MYSQL定时清除备份数据的具体操作

    2024-01-21 02:47:19
  • 实用又漂亮的BootstrapValidator表单验证插件

    2024-05-09 10:39:52
  • Keras神经网络efficientnet模型搭建yolov3目标检测平台

    2021-10-08 11:45:33
  • Python OpenCV学习之图像滤波详解

    2021-09-17 18:49:36
  • Python MySQL数据库连接池组件pymysqlpool详解

    2024-01-22 23:59:17
  • Golang排列组合算法问题之全排列实现方法

    2023-07-14 14:16:19
  • CSS框架带来的效率提升

    2007-12-27 20:01:00
  • PHP读取文本文件并逐行输出该行使用最多的字符与对应次数的方法

    2024-05-11 10:09:43
  • PHP根据IP判断地区名信息的示例代码

    2023-09-10 14:05:55
  • Python数据分析Matplotlib 柱状图绘制

    2023-10-19 03:00:02
  • python pip如何手动安装二进制包

    2023-07-24 04:09:14
  • thinkPHP引入类的方法详解

    2024-06-05 09:45:55
  • CSS3的五个使用技巧[译]

    2009-02-19 13:01:00
  • Mobile Web下的编码设计

    2010-01-28 10:42:00
  • JSP 开发之 releaseSession的实例详解

    2023-06-14 11:49:03
  • Python字符串和文件操作常用函数分析

    2023-07-25 08:42:23
  • python分析apache访问日志脚本分享

    2021-08-12 04:20:09
  • asp之家 网络编程 m.aspxhome.com