Skip to content

Commit bc44f69

Browse files
Add file search functionality by adding a separate collection for files (#5)
* Add file search functionality by adding a separate collection for files * duplication error fixed * use file path from env + reformat --------- Co-authored-by: generall <[email protected]>
1 parent d92fdf6 commit bc44f69

File tree

12 files changed

+268
-43
lines changed

12 files changed

+268
-43
lines changed

code_search/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
QDRANT_CODE_COLLECTION_NAME = "code-snippets-unixcoder"
1515
QDRANT_NLU_COLLECTION_NAME = "code-signatures"
16+
QDRANT_FILE_COLLECTION_NAME="code-files"
1617

1718
ENCODER_NAME = "all-MiniLM-L6-v2"
1819
ENCODER_SIZE = 384

code_search/get_file.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from typing import List
2+
3+
from qdrant_client import QdrantClient
4+
from qdrant_client.http import models
5+
6+
from code_search.config import QDRANT_URL, QDRANT_API_KEY, QDRANT_FILE_COLLECTION_NAME
7+
8+
class FileGet:
9+
10+
def __init__(self):
11+
self.collection_name = QDRANT_FILE_COLLECTION_NAME
12+
self.client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
13+
14+
def get(self, path, limit=5) -> List[dict]:
15+
result = self.client.scroll(
16+
collection_name=self.collection_name,
17+
scroll_filter=models.Filter(
18+
must=[
19+
models.FieldCondition(
20+
key="path",
21+
match=models.MatchValue(value=path),
22+
)
23+
]
24+
),
25+
limit=limit,
26+
)
27+
28+
return [hit.payload for hit in result[0]]
29+
30+
31+
if __name__ == '__main__':
32+
path = "lib/collection/src/collection_manager/optimizers/indexing_optimizer.rs"
33+
34+
searcher = FileGet()
35+
36+
res = searcher.get(path)
37+
for hit in res:
38+
print(hit)

code_search/index/file_uploader.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from pathlib import Path
2+
from qdrant_client import QdrantClient
3+
import json
4+
5+
from code_search.config import QDRANT_URL, QDRANT_API_KEY, DATA_DIR, QDRANT_FILE_COLLECTION_NAME
6+
7+
8+
def encode_and_upload():
9+
qdrant_client = QdrantClient(
10+
QDRANT_URL,
11+
api_key=QDRANT_API_KEY,
12+
)
13+
14+
collection_name = QDRANT_FILE_COLLECTION_NAME
15+
input_file = Path(DATA_DIR) / "rs_files.json"
16+
17+
if not input_file.exists():
18+
raise RuntimeError(f"File {input_file} does not exist. Skipping")
19+
20+
payload = []
21+
with open(input_file, 'r') as json_file:
22+
data = json.load(json_file)
23+
payload = data
24+
25+
print(f"Recreating the collection {collection_name}")
26+
qdrant_client.recreate_collection(
27+
collection_name=collection_name,
28+
vectors_config={}
29+
)
30+
31+
print(f"Storing data in the collection {collection_name}")
32+
qdrant_client.upload_collection(
33+
collection_name=collection_name,
34+
payload=payload,
35+
vectors=[{}] * len(payload),
36+
ids=None,
37+
batch_size=256
38+
)
39+
40+
41+
if __name__ == '__main__':
42+
encode_and_upload()

code_search/index/files_to_json.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os.path
2+
import json
3+
from pathlib import Path
4+
5+
from code_search.config import DATA_DIR
6+
7+
8+
def process_file(root_dir, file_path):
9+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
10+
code_lines = file.readlines()
11+
relative_path = os.path.relpath(file_path, root_dir)
12+
return {
13+
"path": relative_path,
14+
"code": code_lines,
15+
"startline": 1,
16+
"endline": len(code_lines)
17+
}
18+
19+
20+
def explore_directory(root_dir):
21+
result = []
22+
for foldername, subfolders, filenames in os.walk(root_dir):
23+
for filename in filenames:
24+
file_path = os.path.join(foldername, filename)
25+
if file_path.endswith('.rs'):
26+
result.append(process_file(root_dir, file_path))
27+
return result
28+
29+
30+
def main():
31+
folder_path = os.getenv('QDRANT_PATH')
32+
output_file = Path(DATA_DIR) / "rs_files.json"
33+
34+
files_data = explore_directory(folder_path)
35+
36+
with open(output_file, 'w', encoding='utf-8') as json_file:
37+
json.dump(files_data, json_file, indent=2)
38+
39+
40+
if __name__ == "__main__":
41+
main()

code_search/service.py

+8
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55

66
from code_search.config import ROOT_DIR
77
from code_search.searcher import CombinedSearcher
8+
from code_search.get_file import FileGet
89

910
app = FastAPI()
1011

1112
searcher = CombinedSearcher()
13+
get_file = FileGet()
1214

1315

1416
@app.get("/api/search")
@@ -17,6 +19,12 @@ async def search(query: str):
1719
"result": searcher.search(query, limit=5)
1820
}
1921

22+
@app.get("/api/file")
23+
async def file(path: str):
24+
return {
25+
"result": get_file.get(path)
26+
}
27+
2028

2129
app.mount("/", StaticFiles(directory=os.path.join(ROOT_DIR, 'frontend', 'dist'), html=True))
2230

frontend/src/api/constants.ts

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
const API_V1 = "api/";
22

33
export const SEARCH_URL = `${API_V1}search`;
4+
5+
export const FILE_URL = `${API_V1}file`;

frontend/src/api/file.ts

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { Axios } from "./axios";
2+
import { FILE_URL } from "./constants";
3+
4+
export type PathRequest = {
5+
path: string;
6+
};
7+
8+
export const getFileResult = (PathRequest: PathRequest) => {
9+
const params = {
10+
path: PathRequest.path,
11+
};
12+
return Axios().get(FILE_URL, { params });
13+
};

frontend/src/components/CodeContainer/CodeContainer.module.css

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
.codeLoad {
3737
padding-right: 1rem;
3838
padding-left: 1rem;
39+
height: 24px;
40+
width: 56px;
3941
color: #646d76;
4042
background-color: #bddfff;
4143
display: flex;

frontend/src/components/CodeContainer/index.tsx

+71-41
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Box, Button, Image, ThemeIcon, Tooltip } from "@mantine/core";
1+
import { Box, Button, Image, Loader, ThemeIcon, Tooltip } from "@mantine/core";
22
import classes from "./CodeContainer.module.css";
33
import { Highlight, themes } from "prism-react-renderer";
44
import {
@@ -7,6 +7,8 @@ import {
77
IconFoldUp,
88
} from "@tabler/icons-react";
99
import useMountedState from "@/hooks/useMountedState";
10+
import { useGetFile } from "@/hooks/useGetFile";
11+
import { useEffect } from "react";
1012

1113
type CodeContainerProps = {
1214
code_type: string;
@@ -16,8 +18,6 @@ type CodeContainerProps = {
1618
module: string;
1719
snippet: string;
1820
struct_name: string;
19-
upper_lines: string;
20-
lower_lines: string;
2121
};
2222
docstring: string | null;
2323
line: number;
@@ -35,42 +35,61 @@ const loadCount = 10;
3535
export function CodeContainer(props: CodeContainerProps) {
3636
const { context, line_from, sub_matches, line_to } = props;
3737
const [codeLineFrom, setCodeLineFrom] = useMountedState(line_from);
38-
const [codeLineTo, setCodeLineTo] = useMountedState(0);
38+
const [codeLineTo, setCodeLineTo] = useMountedState(line_to);
3939
const [code, setCode] = useMountedState(props.context.snippet);
40+
const { data, error, loading, getFile } = useGetFile();
41+
const [inStack, setInStack] = useMountedState<
42+
"loadUpperCode" | "loadLowerCode" | null
43+
>(null);
4044

4145
const loadUpperCode = () => {
42-
const upperCodeArray = context.upper_lines.split("\n");
43-
const upperCode = upperCodeArray
44-
.slice(
45-
codeLineFrom - loadCount + 1 > 0 ? codeLineFrom - loadCount + 1 : 0,
46-
codeLineFrom
47-
)
48-
.join("\n");
49-
setCodeLineFrom((number) => {
50-
return number - loadCount > 0 ? number - loadCount : 1;
51-
});
52-
setCode(`${upperCode}${code}`);
46+
if (!data) {
47+
getFile(context.file_path);
48+
setInStack("loadUpperCode");
49+
}
50+
if (data) {
51+
const upperCodeArray = data.result[0].code;
52+
const upperCode = upperCodeArray
53+
.slice(
54+
codeLineFrom - loadCount + 1 > 0 ? codeLineFrom - loadCount + 1 : 0,
55+
codeLineFrom
56+
)
57+
.join("");
58+
setCodeLineFrom((number) => {
59+
return number - loadCount > 0 ? number - loadCount : 1;
60+
});
61+
setCode(`${upperCode}${code}`);
62+
}
5363
};
5464

5565
const loadLowerCode = () => {
56-
const lowerCodeArray = context.lower_lines.split("\n");
57-
if (lowerCodeArray.length > codeLineTo + loadCount) {
66+
if (!data) {
67+
getFile(context.file_path);
68+
setInStack("loadLowerCode");
69+
}
70+
if (data) {
71+
const lowerCodeArray = data.result[0].code;
5872
const lowerCode = lowerCodeArray
59-
.slice(codeLineTo, codeLineTo + loadCount + 1)
60-
.join("\n");
73+
.slice(codeLineTo, codeLineTo + loadCount)
74+
.join("");
6175
setCodeLineTo((number) => {
6276
return number + loadCount;
6377
});
6478
setCode(`${code}${lowerCode}`);
65-
} else {
66-
const lowerCode = lowerCodeArray
67-
.slice(codeLineTo, lowerCodeArray.length)
68-
.join("\n");
69-
setCodeLineTo(lowerCodeArray.length);
70-
setCode(`${code}${lowerCode}`);
7179
}
7280
};
7381

82+
useEffect(() => {
83+
if (inStack === "loadUpperCode" && data) {
84+
loadUpperCode();
85+
setInStack(null);
86+
}
87+
if (inStack === "loadLowerCode" && data) {
88+
loadLowerCode();
89+
setInStack(null);
90+
}
91+
}, [data]);
92+
7493
return (
7594
<Box
7695
className={classes.wrapper}
@@ -131,12 +150,18 @@ export function CodeContainer(props: CodeContainerProps) {
131150
withArrow
132151
>
133152
<span className={classes.codeLoad} onClick={loadUpperCode}>
134-
<IconFoldUp />
153+
{loading && inStack === "loadUpperCode" ? (
154+
<Loader type="oval" size="xs" />
155+
) : (
156+
<IconFoldUp />
157+
)}
135158
</span>
136159
</Tooltip>
137160
<div className={classes.codeLine}>
138161
<span className={classes.codeNumber}>
139-
@@ {1} - {codeLineFrom - 1} of {context.file_name}
162+
{error
163+
? error
164+
: `@@ 1 - ${codeLineFrom - 1} of ${context.file_name}`}
140165
</span>
141166
</div>
142167
</div>
@@ -174,10 +199,7 @@ export function CodeContainer(props: CodeContainerProps) {
174199
))}
175200
<div
176201
style={
177-
codeLineTo === context.lower_lines.split("\n").length ||
178-
context.lower_lines === undefined ||
179-
context.lower_lines === null ||
180-
context.lower_lines === ""
202+
data?.result[0].endline && codeLineTo >= data?.result[0].endline
181203
? { display: "none" }
182204
: {
183205
display: "flex",
@@ -191,12 +213,12 @@ export function CodeContainer(props: CodeContainerProps) {
191213
}
192214
>
193215
<Tooltip
194-
label={`Load ${line_to + codeLineTo + 2} to ${
195-
line_to + codeLineTo + loadCount+1 <
196-
context.lower_lines.split("\n").length + line_to
197-
? line_to + codeLineTo + loadCount+1
198-
: context.lower_lines.split("\n").length + line_to
199-
}`}
216+
label={`Load ${codeLineTo + 2} to ${
217+
data?.result[0].endline &&
218+
data?.result[0].endline < codeLineTo + loadCount + 2
219+
? data?.result[0].endline + 1
220+
: codeLineTo + loadCount + 2
221+
} of file`}
200222
withArrow
201223
>
202224
<span
@@ -206,14 +228,22 @@ export function CodeContainer(props: CodeContainerProps) {
206228
}}
207229
onClick={loadLowerCode}
208230
>
209-
<IconFoldDown />
231+
{loading && inStack === "loadLowerCode" ? (
232+
<Loader type="oval" size="xs" />
233+
) : (
234+
<IconFoldDown />
235+
)}
210236
</span>
211237
</Tooltip>
212238
<div className={classes.codeLine}>
213239
<span className={classes.codeNumber}>
214-
@@ {line_to + codeLineTo + 2} -{" "}
215-
{context.lower_lines.split("\n").length + line_to} of{" "}
216-
{context.file_name}
240+
{error
241+
? error
242+
: `@@ ${codeLineTo + 2} - ${
243+
data?.result[0].endline
244+
? data?.result[0].endline + 1
245+
: "end"
246+
} of ${context.file_name}`}
217247
</span>
218248
</div>
219249
</div>

0 commit comments

Comments
 (0)